Skip to content
X86ISelLowering.cpp 356 KiB
Newer Older
X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
                                       bool isSrcConst, bool isSrcStr,
                                       SelectionDAG &DAG) const {
  // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
  // linux.  This is because the stack realignment code can't handle certain
  // cases like PR2962.  This should be removed when PR2962 is fixed.
  const Function *F = DAG.getMachineFunction().getFunction();
  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
  if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
      return MVT::v4i32;
    if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
      return MVT::v4f32;
  }
  if (Subtarget->is64Bit() && Size >= 8)
    return MVT::i64;
  return MVT::i32;
}

/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
Dan Gohman's avatar
Dan Gohman committed
SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
                                                      SelectionDAG &DAG) const {
  if (usesGlobalOffsetTable())
    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
    // This doesn't have DebugLoc associated with it, but is not really the
    // same as a Register.
    return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc::getUnknownLoc(),
                       getPointerTy());
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
}

//===----------------------------------------------------------------------===//
//               Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//

/// LowerRET - Lower an ISD::RET node.
Dan Gohman's avatar
Dan Gohman committed
SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
  DebugLoc dl = Op.getDebugLoc();
  assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
  SmallVector<CCValAssign, 16> RVLocs;
  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs, *DAG.getContext());
  // If this is the first return lowered for this function, add the regs to the
  // liveout set for the function.
  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
    for (unsigned i = 0; i != RVLocs.size(); ++i)
      if (RVLocs[i].isRegLoc())
        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
Dan Gohman's avatar
Dan Gohman committed
  SDValue Chain = Op.getOperand(0);
  Chain = GetPossiblePreceedingTailCall(Chain, X86ISD::TAILCALL);
  if (Chain.getOpcode() == X86ISD::TAILCALL) {
Dan Gohman's avatar
Dan Gohman committed
    SDValue TailCall = Chain;
    SDValue TargetAddress = TailCall.getOperand(1);
    SDValue StackAdjustment = TailCall.getOperand(2);
    assert(((TargetAddress.getOpcode() == ISD::Register &&
               (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
                cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
              TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
              TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
             "Expecting an global address, external symbol, or register");
    assert(StackAdjustment.getOpcode() == ISD::Constant &&
           "Expecting a const value");
Dan Gohman's avatar
Dan Gohman committed
    SmallVector<SDValue,8> Operands;
    Operands.push_back(Chain.getOperand(0));
    Operands.push_back(TargetAddress);
    Operands.push_back(StackAdjustment);
    // Copy registers used by the call. Last operand is a flag so it is not
    // copied.
    for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
      Operands.push_back(Chain.getOperand(i));
    }
    return DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, &Operands[0],
Dan Gohman's avatar
Dan Gohman committed
  SDValue Flag;
Dan Gohman's avatar
Dan Gohman committed
  SmallVector<SDValue, 6> RetOps;
  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
  // Operand #1 = Bytes To Pop
  RetOps.push_back(DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
  // Copy the result values into the output registers.
  for (unsigned i = 0; i != RVLocs.size(); ++i) {
    CCValAssign &VA = RVLocs[i];
    assert(VA.isRegLoc() && "Can only return in registers!");
Dan Gohman's avatar
Dan Gohman committed
    SDValue ValToCopy = Op.getOperand(i*2+1);
    // Returns in ST0/ST1 are handled specially: these are pushed as operands to
    // the RET instruction and handled by the FP Stackifier.
    if (VA.getLocReg() == X86::ST0 ||
        VA.getLocReg() == X86::ST1) {
      // If this is a copy from an xmm register to ST(0), use an FPExtend to
      // change the value to the FP stack register class.
      if (isScalarFPTypeInSSEReg(VA.getValVT()))
        ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
      RetOps.push_back(ValToCopy);
      // Don't emit a copytoreg.
      continue;
    }
    // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
    // which is returned in RAX / RDX.
    if (Subtarget->is64Bit()) {
      MVT ValVT = ValToCopy.getValueType();
      if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
        ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
        if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
          ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
      }
    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);

  // The x86-64 ABI for returning structs by value requires that we copy
  // the sret argument into %rax for the return. We saved the argument into
  // a virtual register in the entry block, so now we copy the value out
  // and into %rax.
  if (Subtarget->is64Bit() &&
      DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
    MachineFunction &MF = DAG.getMachineFunction();
    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
    unsigned Reg = FuncInfo->getSRetReturnReg();
    if (!Reg) {
      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
      FuncInfo->setSRetReturnReg(Reg);
    }
    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
    Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
  RetOps[0] = Chain;  // Update chain.

  // Add the flag if we have it.

  return DAG.getNode(X86ISD::RET_FLAG, dl,
                     MVT::Other, &RetOps[0], RetOps.size());
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers.  This assumes that
/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
/// being lowered.  The returns a SDNode with the same number of values as the
/// ISD::CALL.
SDNode *X86TargetLowering::
LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
                unsigned CallingConv, SelectionDAG &DAG) {
  DebugLoc dl = TheCall->getDebugLoc();
  // Assign locations to each value returned by this call.
  bool isVarArg = TheCall->isVarArg();
  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(),
                 RVLocs, *DAG.getContext());
  CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);

Dan Gohman's avatar
Dan Gohman committed
  SmallVector<SDValue, 8> ResultVals;
  // Copy all of the result registers out of their specified physreg.
  for (unsigned i = 0; i != RVLocs.size(); ++i) {
    CCValAssign &VA = RVLocs[i];
    MVT CopyVT = VA.getValVT();
    // If this is x86-64, and we disabled SSE, we can't return FP values
    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
        ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
      llvm_report_error("SSE register return with SSE disabled");
    // If this is a call to a function that returns an fp value on the floating
    // point stack, but where we prefer to use the value in xmm registers, copy
    // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
    if ((VA.getLocReg() == X86::ST0 ||
         VA.getLocReg() == X86::ST1) &&
        isScalarFPTypeInSSEReg(VA.getValVT())) {
    SDValue Val;
    if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
      // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
      if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
        Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                   MVT::v2i64, InFlag).getValue(1);
        Val = Chain.getValue(0);
        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
                          Val, DAG.getConstant(0, MVT::i64));        
      } else {
        Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                   MVT::i64, InFlag).getValue(1);
        Val = Chain.getValue(0);
      }
      Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
    } else {
      Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                 CopyVT, InFlag).getValue(1);
      Val = Chain.getValue(0);
    }
    if (CopyVT != VA.getValVT()) {
      // Round the F80 the right size, which also moves to the appropriate xmm
      // register.
      Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
                        // This truncation won't change the value.
                        DAG.getIntPtrConstant(1));
    }
  // Merge everything together with a MERGE_VALUES node.
  ResultVals.push_back(Chain);
  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
                     &ResultVals[0], ResultVals.size()).getNode();
//===----------------------------------------------------------------------===//
//                C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
//  StdCall calling convention seems to be standard for many Windows' API
//  routines and around. It differs from C calling convention just a little:
//  callee should clean up the stack, not caller. Symbols should be also
//  decorated in some fancy way :) It doesn't support any vector arguments.
//  For info on fast calling convention see Fast Calling Convention (tail call)
//  implementation LowerX86_32FastCCCallTo.
/// CallIsStructReturn - Determines whether a CALL node uses struct return
/// semantics.
static bool CallIsStructReturn(CallSDNode *TheCall) {
  unsigned NumOps = TheCall->getNumArgs();
  return TheCall->getArgFlags(0).isSRet();
/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
/// return semantics.
Dan Gohman's avatar
Dan Gohman committed
static bool ArgsAreStructReturn(SDValue Op) {
  unsigned NumArgs = Op.getNode()->getNumValues() - 1;

  return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
/// the callee to pop its own arguments. Callee pop is necessary to support tail
/// calls.
bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
  default:
    return false;
  case CallingConv::X86_StdCall:
    return !Subtarget->is64Bit();
  case CallingConv::X86_FastCall:
    return !Subtarget->is64Bit();
  case CallingConv::Fast:
    return PerformTailCallOpt;
  }
}

/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
  if (Subtarget->is64Bit()) {
Evan Cheng's avatar
Evan Cheng committed
    else
      return CC_X86_64_C;
  if (CC == CallingConv::X86_FastCall)
    return CC_X86_32_FastCall;
  else if (CC == CallingConv::Fast)
    return CC_X86_32_FastCC;
/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
Dan Gohman's avatar
Dan Gohman committed
X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
  if (CC == CallingConv::X86_FastCall)
    return FastCall;
  else if (CC == CallingConv::X86_StdCall)
    return StdCall;
  return None;
}

/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
/// function parameter.
Dan Gohman's avatar
Dan Gohman committed
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
                          DebugLoc dl) {
Dan Gohman's avatar
Dan Gohman committed
  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                       /*AlwaysInline=*/true, NULL, 0, NULL, 0);
Dan Gohman's avatar
Dan Gohman committed
SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG,
                                              const CCValAssign &VA,
                                              MachineFrameInfo *MFI,
Dan Gohman's avatar
Dan Gohman committed
                                              SDValue Root, unsigned i) {
  // Create the nodes corresponding to a load from this parameter slot.
  ISD::ArgFlagsTy Flags =
    cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
  bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
  // FIXME: For now, all byval parameter objects are marked mutable. This can be
  // changed with more analysis.
  // In case of tail call optimization mark all arguments mutable. Since they
  // could be overwritten by lowering of arguments in case of a tail call.
  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
                                  VA.getLocMemOffset(), isImmutable);
Dan Gohman's avatar
Dan Gohman committed
  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
  if (Flags.isByVal())
  return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN,
                     PseudoSourceValue::getFixedStack(FI), 0);
Dan Gohman's avatar
Dan Gohman committed
SDValue
X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
  MachineFunction &MF = DAG.getMachineFunction();
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  DebugLoc dl = Op.getDebugLoc();
  const Function* Fn = MF.getFunction();
  if (Fn->hasExternalLinkage() &&
      Subtarget->isTargetCygMing() &&
      Fn->getName() == "main")
    FuncInfo->setForceFramePointer(true);
Chris Lattner's avatar
Chris Lattner committed

  // Decorate the function name.
  FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
  MachineFrameInfo *MFI = MF.getFrameInfo();
Dan Gohman's avatar
Dan Gohman committed
  SDValue Root = Op.getOperand(0);
  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
  unsigned CC = MF.getFunction()->getCallingConv();
  bool Is64Bit = Subtarget->is64Bit();
  bool IsWin64 = Subtarget->isTargetWin64();

  assert(!(isVarArg && CC == CallingConv::Fast) &&
         "Var args not supported with calling convention fastcc");
  // Assign locations to all of the incoming arguments.
Chris Lattner's avatar
Chris Lattner committed
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
  CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
Dan Gohman's avatar
Dan Gohman committed
  SmallVector<SDValue, 8> ArgValues;
Chris Lattner's avatar
Chris Lattner committed
  unsigned LastVal = ~0U;
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
    // places.
    assert(VA.getValNo() != LastVal &&
           "Don't support value assigned to multiple locs yet");
    LastVal = VA.getValNo();
Chris Lattner's avatar
Chris Lattner committed
    if (VA.isRegLoc()) {
      MVT RegVT = VA.getLocVT();
Devang Patel's avatar
 
Devang Patel committed
      TargetRegisterClass *RC = NULL;
Chris Lattner's avatar
Chris Lattner committed
      if (RegVT == MVT::i32)
        RC = X86::GR32RegisterClass;
      else if (Is64Bit && RegVT == MVT::i64)
Chris Lattner's avatar
Chris Lattner committed
        RC = X86::GR64RegisterClass;
Dale Johannesen's avatar
Dale Johannesen committed
      else if (RegVT == MVT::f32)
Chris Lattner's avatar
Chris Lattner committed
        RC = X86::FR32RegisterClass;
Dale Johannesen's avatar
Dale Johannesen committed
      else if (RegVT == MVT::f64)
Chris Lattner's avatar
Chris Lattner committed
        RC = X86::FR64RegisterClass;
      else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
Evan Cheng's avatar
Evan Cheng committed
        RC = X86::VR128RegisterClass;
      else if (RegVT.isVector()) {
        assert(RegVT.getSizeInBits() == 64);
Evan Cheng's avatar
Evan Cheng committed
        if (!Is64Bit)
          RC = X86::VR64RegisterClass;     // MMX values are passed in MMXs.
        else {
          // Darwin calling convention passes MMX values in either GPRs or
          // XMMs in x86-64. Other targets pass them in memory.
          if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
            RC = X86::VR128RegisterClass;  // MMX values are passed in XMMs.
            RegVT = MVT::v2i64;
          } else {
            RC = X86::GR64RegisterClass;   // v1i64 values are passed in GPRs.
            RegVT = MVT::i64;
          }
        }
      } else {
        llvm_unreachable("Unknown argument type!");
      unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
Chris Lattner's avatar
Chris Lattner committed
      // If this is an 8 or 16-bit value, it is really passed promoted to 32
      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
      // right size.
      if (VA.getLocInfo() == CCValAssign::SExt)
        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
Chris Lattner's avatar
Chris Lattner committed
                               DAG.getValueType(VA.getValVT()));
      else if (VA.getLocInfo() == CCValAssign::ZExt)
        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
Chris Lattner's avatar
Chris Lattner committed
                               DAG.getValueType(VA.getValVT()));
Chris Lattner's avatar
Chris Lattner committed
      if (VA.getLocInfo() != CCValAssign::Full)
        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
      // Handle MMX values passed in GPRs.
      if (Is64Bit && RegVT != VA.getLocVT()) {
        if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
        else if (RC == X86::VR128RegisterClass) {
          ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
                                 ArgValue, DAG.getConstant(0, MVT::i64));
          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
Chris Lattner's avatar
Chris Lattner committed
      ArgValues.push_back(ArgValue);
    } else {
      assert(VA.isMemLoc());
      ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
Chris Lattner's avatar
Chris Lattner committed
    }
  }
  // The x86-64 ABI for returning structs by value requires that we copy
  // the sret argument into %rax for the return. Save the argument into
  // a virtual register so that we can access it from the return points.
  if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
    MachineFunction &MF = DAG.getMachineFunction();
    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
    unsigned Reg = FuncInfo->getSRetReturnReg();
    if (!Reg) {
      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
      FuncInfo->setSRetReturnReg(Reg);
    }
    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
    Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
Chris Lattner's avatar
Chris Lattner committed
  unsigned StackSize = CCInfo.getNextStackOffset();
  // align stack specially for tail calls
Evan Cheng's avatar
Evan Cheng committed
  if (PerformTailCallOpt && CC == CallingConv::Fast)
    StackSize = GetAlignedArgumentStackSize(StackSize, DAG);

Chris Lattner's avatar
Chris Lattner committed
  // If the function takes variable number of arguments, make a frame index for
  // the start of the first vararg value... for expansion of llvm.va_start.
  if (isVarArg) {
    if (Is64Bit || CC != CallingConv::X86_FastCall) {
      VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
      unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;

      // FIXME: We should really autogenerate these arrays
      static const unsigned GPR64ArgRegsWin64[] = {
        X86::RCX, X86::RDX, X86::R8,  X86::R9
      };
      static const unsigned XMMArgRegsWin64[] = {
        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
      static const unsigned GPR64ArgRegs64Bit[] = {
        X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
      };
      static const unsigned XMMArgRegs64Bit[] = {
        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
        X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
      };
      const unsigned *GPR64ArgRegs, *XMMArgRegs;

      if (IsWin64) {
        TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
        GPR64ArgRegs = GPR64ArgRegsWin64;
        XMMArgRegs = XMMArgRegsWin64;
      } else {
        TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
        GPR64ArgRegs = GPR64ArgRegs64Bit;
        XMMArgRegs = XMMArgRegs64Bit;
      }
      unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
                                                       TotalNumIntRegs);
      unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
                                                       TotalNumXMMRegs);

      bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
      assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
             "SSE register cannot be used when SSE is disabled!");
      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
             "SSE register cannot be used when SSE is disabled!");
      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
        // Kernel mode asks for SSE to be disabled, so don't push them
        // on the stack.
        TotalNumXMMRegs = 0;
      // For X86-64, if there are vararg parameters that are passed via
      // registers, then we must store them to their spots on the stack so they
      // may be loaded by deferencing the result of va_next.
      VarArgsGPOffset = NumIntRegs * 8;
      VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
      RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
                                                 TotalNumXMMRegs * 16, 16);

      // Store the integer parameter registers.
Dan Gohman's avatar
Dan Gohman committed
      SmallVector<SDValue, 8> MemOps;
      SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
      SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                                  DAG.getIntPtrConstant(VarArgsGPOffset));
      for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
        unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
                                     X86::GR64RegisterClass);
        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
Dan Gohman's avatar
Dan Gohman committed
        SDValue Store =
          DAG.getStore(Val.getValue(1), dl, Val, FIN,
                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                          DAG.getIntPtrConstant(8));
      // Now store the XMM (fp + vector) parameter registers.
      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                        DAG.getIntPtrConstant(VarArgsFPOffset));
      for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
        unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
                                     X86::VR128RegisterClass);
        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32);
Dan Gohman's avatar
Dan Gohman committed
        SDValue Store =
          DAG.getStore(Val.getValue(1), dl, Val, FIN,
                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                          DAG.getIntPtrConstant(16));
          Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Chris Lattner's avatar
Chris Lattner committed
    }
  if (IsCalleePop(isVarArg, CC)) {
    BytesToPopOnReturn  = StackSize; // Callee pops everything.
    BytesToPopOnReturn  = 0; // Callee pops nothing.
    // If this is an sret function, the return should pop the hidden pointer.
    if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op))

  if (!Is64Bit) {
    RegSaveFrameIndex = 0xAAAAAAA;   // RegSaveFrameIndex is X86-64 only.
    if (CC == CallingConv::X86_FastCall)
      VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
  }

  FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);

  // Return the new list of results.
  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
Dan Gohman's avatar
Dan Gohman committed
SDValue
X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
Dan Gohman's avatar
Dan Gohman committed
                                    const SDValue &StackPtr,
Dan Gohman's avatar
Dan Gohman committed
                                    SDValue Chain,
                                    SDValue Arg, ISD::ArgFlagsTy Flags) {
  DebugLoc dl = TheCall->getDebugLoc();
  unsigned LocMemOffset = VA.getLocMemOffset();
Dan Gohman's avatar
Dan Gohman committed
  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
  if (Flags.isByVal()) {
    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
  return DAG.getStore(Chain, dl, Arg, PtrOff,
                      PseudoSourceValue::getStack(), LocMemOffset);
/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
/// optimization is performed and it is required.
SDValue
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
Dan Gohman's avatar
Dan Gohman committed
                                           SDValue &OutRetAddr,
                                           SDValue Chain,
                                           bool IsTailCall,
                                           bool Is64Bit,
                                           int FPDiff,
                                           DebugLoc dl) {
  if (!IsTailCall || FPDiff==0) return Chain;

  // Adjust the Return address stack slot.
  MVT VT = getPointerTy();
  OutRetAddr = getReturnAddressFrameIndex(DAG);
  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
}

/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
Dan Gohman's avatar
Dan Gohman committed
                         SDValue Chain, SDValue RetAddrFrIdx,
                         bool Is64Bit, int FPDiff, DebugLoc dl) {
  // Store the return address to the appropriate stack slot.
  if (!FPDiff) return Chain;
  // Calculate the new stack slot for the return address.
  int SlotSize = Is64Bit ? 8 : 4;
    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
  MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
Dan Gohman's avatar
Dan Gohman committed
  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
  Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
Dan Gohman's avatar
Dan Gohman committed
SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
  MachineFunction &MF = DAG.getMachineFunction();
  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
  SDValue Chain       = TheCall->getChain();
  unsigned CC         = TheCall->getCallingConv();
  bool isVarArg       = TheCall->isVarArg();
  bool IsTailCall     = TheCall->isTailCall() &&
                        CC == CallingConv::Fast && PerformTailCallOpt;
  SDValue Callee      = TheCall->getCallee();
  bool Is64Bit        = Subtarget->is64Bit();
  bool IsStructRet    = CallIsStructReturn(TheCall);
  DebugLoc dl         = TheCall->getDebugLoc();

  assert(!(isVarArg && CC == CallingConv::Fast) &&
         "Var args not supported with calling convention fastcc");

  // Analyze operands of the call, assigning locations to each operand.
Chris Lattner's avatar
Chris Lattner committed
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext());
  CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
Chris Lattner's avatar
Chris Lattner committed
  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NumBytes = CCInfo.getNextStackOffset();
  if (PerformTailCallOpt && CC == CallingConv::Fast)
    NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
  int FPDiff = 0;
  if (IsTailCall) {
    // Lower arguments at fp - stackoffset + fpdiff.
    unsigned NumBytesCallerPushed =
      MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
    FPDiff = NumBytesCallerPushed - NumBytes;

    // Set the delta of movement of the returnaddr stackslot.
    // But only set if delta is greater than previous delta.
    if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
      MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
  }

  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
Dan Gohman's avatar
Dan Gohman committed
  SDValue RetAddrFrIdx;
  // Load return adress for tail calls.
  Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
Dan Gohman's avatar
Dan Gohman committed
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
  SmallVector<SDValue, 8> MemOpChains;
  SDValue StackPtr;
  // Walk the register/memloc assignments, inserting copies/loads.  In the case
  // of tail call optimization arguments are handle later.
Chris Lattner's avatar
Chris Lattner committed
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    SDValue Arg = TheCall->getArg(i);
    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
    bool isByVal = Flags.isByVal();
Chris Lattner's avatar
Chris Lattner committed
    // Promote the value if needed.
    switch (VA.getLocInfo()) {
    default: llvm_unreachable("Unknown loc info!");
Chris Lattner's avatar
Chris Lattner committed
    case CCValAssign::Full: break;
    case CCValAssign::SExt:
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
Chris Lattner's avatar
Chris Lattner committed
      break;
    case CCValAssign::ZExt:
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
Chris Lattner's avatar
Chris Lattner committed
      break;
    case CCValAssign::AExt:
      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
Chris Lattner's avatar
Chris Lattner committed
    if (VA.isRegLoc()) {
        MVT RegVT = VA.getLocVT();
        if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
          switch (VA.getLocReg()) {
          default:
            break;
          case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
          case X86::R8: {
            // Special case: passing MMX values in GPR registers.
            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
            break;
          }
          case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
          case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
            // Special case: passing MMX values in XMM registers.
            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
            Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
            Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
Chris Lattner's avatar
Chris Lattner committed
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    } else {
      if (!IsTailCall || (IsTailCall && isByVal)) {
          StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
        MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
                                               Chain, Arg, Flags));
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
  // Build a sequence of copy-to-reg nodes chained together with token chain
  // and flag operands which copy the outgoing args into registers.
Dan Gohman's avatar
Dan Gohman committed
  SDValue InFlag;
  // Tail call byval lowering might overwrite argument registers so in case of
  // tail call optimization the copies to registers are lowered later.
  if (!IsTailCall)
    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                               RegsToPass[i].second, InFlag);
      InFlag = Chain.getValue(1);
    }
  if (Subtarget->isPICStyleGOT()) {
    // ELF / PIC requires GOT in the EBX register before function calls via PLT
    // GOT pointer.
    if (!IsTailCall) {
      Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
                               DAG.getNode(X86ISD::GlobalBaseReg,
                                           DebugLoc::getUnknownLoc(),
                                           getPointerTy()),
                               InFlag);
      InFlag = Chain.getValue(1);
    } else {
      // If we are tail calling and generating PIC/GOT style code load the
      // address of the callee into ECX. The value in ecx is used as target of
      // the tail jump. This is done to circumvent the ebx/callee-saved problem
      // for tail calls on PIC/GOT architectures. Normally we would just put the
      // address of GOT into ebx and then call target@PLT. But for tail calls
      // ebx would be restored (since ebx is callee saved) before jumping to the
      // target@PLT.

      // Note: The actual moving to ECX is done further down.
      GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
      if (G && !G->getGlobal()->hasHiddenVisibility() &&
          !G->getGlobal()->hasProtectedVisibility())
        Callee = LowerGlobalAddress(Callee, DAG);
      else if (isa<ExternalSymbolSDNode>(Callee))
        Callee = LowerExternalSymbol(Callee, DAG);
Chris Lattner's avatar
Chris Lattner committed
    // From AMD64 ABI document:
    // For calls that may call functions that use varargs or stdargs
    // (prototype-less calls or calls to functions containing ellipsis (...) in
    // the declaration) %al is used as hidden argument to specify the number
    // of SSE registers used. The contents of %al do not need to match exactly
    // the number of registers, but must be an ubound on the number of SSE
    // registers used and is in the range 0 - 8 inclusive.
Chris Lattner's avatar
Chris Lattner committed
    // Count the number of XMM registers allocated.
    static const unsigned XMMArgRegs[] = {
      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
    };
    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
    assert((Subtarget->hasSSE1() || !NumXMMRegs)
           && "SSE registers cannot be used when SSE is disabled");
    Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
Chris Lattner's avatar
Chris Lattner committed
                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
    InFlag = Chain.getValue(1);
  }

  // For tail calls lower the arguments to the 'real' stack slot.
Dan Gohman's avatar
Dan Gohman committed
    SmallVector<SDValue, 8> MemOpChains2;
    SDValue FIN;
    // Do not flag preceeding copytoreg stuff together with the following stuff.
Dan Gohman's avatar
Dan Gohman committed
    InFlag = SDValue();
    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
      CCValAssign &VA = ArgLocs[i];
      if (!VA.isRegLoc()) {
        SDValue Arg = TheCall->getArg(i);
        ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
        // Create frame index.
        int32_t Offset = VA.getLocMemOffset()+FPDiff;
        uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
        FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
        FIN = DAG.getFrameIndex(FI, getPointerTy());
        if (Flags.isByVal()) {
Evan Cheng's avatar
Evan Cheng committed
          // Copy relative to framepointer.
Dan Gohman's avatar
Dan Gohman committed
          SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
            StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
          Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);

          MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
                                                           Flags, DAG, dl));
Evan Cheng's avatar
Evan Cheng committed
          // Store relative to framepointer.
            DAG.getStore(Chain, dl, Arg, FIN,
                         PseudoSourceValue::getFixedStack(FI), 0));
      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                          &MemOpChains2[0], MemOpChains2.size());
    // Copy arguments to their registers.
    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                               RegsToPass[i].second, InFlag);
      InFlag = Chain.getValue(1);
    }
Dan Gohman's avatar
Dan Gohman committed
    InFlag =SDValue();
    // Store the return address to the appropriate stack slot.
    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    // We should use extra load for direct calls to dllimported functions in
    // non-JIT mode.
    if (!GV->hasDLLImportLinkage()) {
      unsigned char OpFlags = 0;
    
      // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
      // external symbols most go through the PLT in PIC mode.  If the symbol
      // has hidden or protected visibility, or if it is static or local, then
      // we don't need to use the PLT - we can directly call it.
      if (Subtarget->isTargetELF() &&
          getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
          GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
      } else if (Subtarget->isPICStyleStubAny() &&
               (GV->isDeclaration() || GV->isWeakForLinker()) &&
               Subtarget->getDarwinVers() < 9) {
        // PC-relative references to external symbols should go through $stub,
        // unless we're building with the leopard linker or later, which
        // automatically synthesizes these stubs.
        OpFlags = X86II::MO_DARWIN_STUB;
      }
      Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(),
  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
    unsigned char OpFlags = 0;

    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
    // symbols should go through the PLT.
    if (Subtarget->isTargetELF() &&
        getTargetMachine().getRelocationModel() == Reloc::PIC_) {
    } else if (Subtarget->isPICStyleStubAny() &&
             Subtarget->getDarwinVers() < 9) {
      // PC-relative references to external symbols should go through $stub,
      // unless we're building with the leopard linker or later, which
      // automatically synthesizes these stubs.
      OpFlags = X86II::MO_DARWIN_STUB;
    }
      
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
                                         OpFlags);
    unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
    Chain = DAG.getCopyToReg(Chain,  dl,
                             DAG.getRegister(Opc, getPointerTy()),
                             Callee,InFlag);
    Callee = DAG.getRegister(Opc, getPointerTy());
    // Add register as live out.
    DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
  // Returns a chain & a flag for retval copy to use.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Dan Gohman's avatar
Dan Gohman committed
  SmallVector<SDValue, 8> Ops;
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                           DAG.getIntPtrConstant(0, true), InFlag);
    // Returns a chain & a flag for retval copy to use.
    NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
    Ops.clear();
  }
  Ops.push_back(Chain);
  Ops.push_back(Callee);

  if (IsTailCall)
    Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));

  // Add argument registers to the end of the list so that they are known live
  // into the call.
Evan Cheng's avatar
Evan Cheng committed
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                  RegsToPass[i].second.getValueType()));
  // Add an implicit use GOT pointer in EBX.
  if (!IsTailCall && Subtarget->isPICStyleGOT())
    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));

  // Add an implicit use of AL for x86 vararg functions.
  if (Is64Bit && isVarArg)
    Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));

           "Flag must be set. Depend on flag being set in LowerRET");