Skip to content
X86ISelLowering.cpp 343 KiB
Newer Older
    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);

  // The x86-64 ABI for returning structs by value requires that we copy
  // the sret argument into %rax for the return. We saved the argument into
  // a virtual register in the entry block, so now we copy the value out
  // and into %rax.
  if (Subtarget->is64Bit() &&
      DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
    MachineFunction &MF = DAG.getMachineFunction();
    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
    unsigned Reg = FuncInfo->getSRetReturnReg();
    if (!Reg) {
      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
      FuncInfo->setSRetReturnReg(Reg);
    }
    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
    Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
  RetOps[0] = Chain;  // Update chain.

  // Add the flag if we have it.

  return DAG.getNode(X86ISD::RET_FLAG, dl,
                     MVT::Other, &RetOps[0], RetOps.size());
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers.  This assumes that
/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
/// being lowered.  The returns a SDNode with the same number of values as the
/// ISD::CALL.
SDNode *X86TargetLowering::
LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
                unsigned CallingConv, SelectionDAG &DAG) {
  DebugLoc dl = TheCall->getDebugLoc();
  // Assign locations to each value returned by this call.
  bool isVarArg = TheCall->isVarArg();
  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
  CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);

Dan Gohman's avatar
Dan Gohman committed
  SmallVector<SDValue, 8> ResultVals;
  // Copy all of the result registers out of their specified physreg.
  for (unsigned i = 0; i != RVLocs.size(); ++i) {
    CCValAssign &VA = RVLocs[i];
    MVT CopyVT = VA.getValVT();
    // If this is x86-64, and we disabled SSE, we can't return FP values
    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
        ((Is64Bit || TheCall->isInreg()) && !Subtarget->hasSSE1())) {
      cerr << "SSE register return with SSE disabled\n";
      exit(1);
    }

    // If this is a call to a function that returns an fp value on the floating
    // point stack, but where we prefer to use the value in xmm registers, copy
    // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
    if ((VA.getLocReg() == X86::ST0 ||
         VA.getLocReg() == X86::ST1) &&
        isScalarFPTypeInSSEReg(VA.getValVT())) {
    SDValue Val;
    if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
      // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
      if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
        Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                   MVT::v2i64, InFlag).getValue(1);
        Val = Chain.getValue(0);
        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
                          Val, DAG.getConstant(0, MVT::i64));        
      } else {
        Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                   MVT::i64, InFlag).getValue(1);
        Val = Chain.getValue(0);
      }
      Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val);
    } else {
      Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
                                 CopyVT, InFlag).getValue(1);
      Val = Chain.getValue(0);
    }
    if (CopyVT != VA.getValVT()) {
      // Round the F80 the right size, which also moves to the appropriate xmm
      // register.
      Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
                        // This truncation won't change the value.
                        DAG.getIntPtrConstant(1));
    }
  // Merge everything together with a MERGE_VALUES node.
  ResultVals.push_back(Chain);
  return DAG.getNode(ISD::MERGE_VALUES, dl, TheCall->getVTList(),
                     &ResultVals[0], ResultVals.size()).getNode();
//===----------------------------------------------------------------------===//
//                C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
//  StdCall calling convention seems to be standard for many Windows' API
//  routines and around. It differs from C calling convention just a little:
//  callee should clean up the stack, not caller. Symbols should be also
//  decorated in some fancy way :) It doesn't support any vector arguments.
//  For info on fast calling convention see Fast Calling Convention (tail call)
//  implementation LowerX86_32FastCCCallTo.
/// CallIsStructReturn - Determines whether a CALL node uses struct return
/// semantics.
static bool CallIsStructReturn(CallSDNode *TheCall) {
  unsigned NumOps = TheCall->getNumArgs();
  return TheCall->getArgFlags(0).isSRet();
/// ArgsAreStructReturn - Determines whether a FORMAL_ARGUMENTS node uses struct
/// return semantics.
Dan Gohman's avatar
Dan Gohman committed
static bool ArgsAreStructReturn(SDValue Op) {
  unsigned NumArgs = Op.getNode()->getNumValues() - 1;

  return cast<ARG_FLAGSSDNode>(Op.getOperand(3))->getArgFlags().isSRet();
/// IsCalleePop - Determines whether a CALL or FORMAL_ARGUMENTS node requires
/// the callee to pop its own arguments. Callee pop is necessary to support tail
/// calls.
bool X86TargetLowering::IsCalleePop(bool IsVarArg, unsigned CallingConv) {
  default:
    return false;
  case CallingConv::X86_StdCall:
    return !Subtarget->is64Bit();
  case CallingConv::X86_FastCall:
    return !Subtarget->is64Bit();
  case CallingConv::Fast:
    return PerformTailCallOpt;
  }
}

/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
  if (Subtarget->is64Bit()) {
Evan Cheng's avatar
Evan Cheng committed
    else
      return CC_X86_64_C;
  if (CC == CallingConv::X86_FastCall)
    return CC_X86_32_FastCall;
  else if (CC == CallingConv::Fast)
    return CC_X86_32_FastCC;
/// NameDecorationForFORMAL_ARGUMENTS - Selects the appropriate decoration to
/// apply to a MachineFunction containing a given FORMAL_ARGUMENTS node.
Dan Gohman's avatar
Dan Gohman committed
X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDValue Op) {
  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
  if (CC == CallingConv::X86_FastCall)
    return FastCall;
  else if (CC == CallingConv::X86_StdCall)
    return StdCall;
  return None;
}

/// CallRequiresGOTInRegister - Check whether the call requires the GOT pointer
/// in a register before calling.
bool X86TargetLowering::CallRequiresGOTPtrInReg(bool Is64Bit, bool IsTailCall) {
  return !IsTailCall && !Is64Bit &&
    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
    Subtarget->isPICStyleGOT();
}

/// CallRequiresFnAddressInReg - Check whether the call requires the function
/// address to be loaded in a register.
X86TargetLowering::CallRequiresFnAddressInReg(bool Is64Bit, bool IsTailCall) {
  return !Is64Bit && IsTailCall &&
    getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
    Subtarget->isPICStyleGOT();
}

/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
/// by "Src" to address "Dst" with size and alignment information specified by
/// the specific parameter attribute. The copy will be passed as a byval
/// function parameter.
Dan Gohman's avatar
Dan Gohman committed
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
                          DebugLoc dl) {
Dan Gohman's avatar
Dan Gohman committed
  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                       /*AlwaysInline=*/true, NULL, 0, NULL, 0);
Dan Gohman's avatar
Dan Gohman committed
SDValue X86TargetLowering::LowerMemArgument(SDValue Op, SelectionDAG &DAG,
                                              const CCValAssign &VA,
                                              MachineFrameInfo *MFI,
Dan Gohman's avatar
Dan Gohman committed
                                              SDValue Root, unsigned i) {
  // Create the nodes corresponding to a load from this parameter slot.
  ISD::ArgFlagsTy Flags =
    cast<ARG_FLAGSSDNode>(Op.getOperand(3 + i))->getArgFlags();
  bool AlwaysUseMutable = (CC==CallingConv::Fast) && PerformTailCallOpt;
  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
  // FIXME: For now, all byval parameter objects are marked mutable. This can be
  // changed with more analysis.
  // In case of tail call optimization mark all arguments mutable. Since they
  // could be overwritten by lowering of arguments in case of a tail call.
  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
                                  VA.getLocMemOffset(), isImmutable);
Dan Gohman's avatar
Dan Gohman committed
  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
  if (Flags.isByVal())
  return DAG.getLoad(VA.getValVT(), Op.getDebugLoc(), Root, FIN,
                     PseudoSourceValue::getFixedStack(FI), 0);
Dan Gohman's avatar
Dan Gohman committed
SDValue
X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
  MachineFunction &MF = DAG.getMachineFunction();
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  DebugLoc dl = Op.getDebugLoc();
  const Function* Fn = MF.getFunction();
  if (Fn->hasExternalLinkage() &&
      Subtarget->isTargetCygMing() &&
      Fn->getName() == "main")
    FuncInfo->setForceFramePointer(true);
Chris Lattner's avatar
Chris Lattner committed

  // Decorate the function name.
  FuncInfo->setDecorationStyle(NameDecorationForFORMAL_ARGUMENTS(Op));
  MachineFrameInfo *MFI = MF.getFrameInfo();
Dan Gohman's avatar
Dan Gohman committed
  SDValue Root = Op.getOperand(0);
  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
  unsigned CC = MF.getFunction()->getCallingConv();
  bool Is64Bit = Subtarget->is64Bit();
  bool IsWin64 = Subtarget->isTargetWin64();

  assert(!(isVarArg && CC == CallingConv::Fast) &&
         "Var args not supported with calling convention fastcc");
  // Assign locations to all of the incoming arguments.
Chris Lattner's avatar
Chris Lattner committed
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
  CCInfo.AnalyzeFormalArguments(Op.getNode(), CCAssignFnForNode(CC));
Dan Gohman's avatar
Dan Gohman committed
  SmallVector<SDValue, 8> ArgValues;
Chris Lattner's avatar
Chris Lattner committed
  unsigned LastVal = ~0U;
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
    // places.
    assert(VA.getValNo() != LastVal &&
           "Don't support value assigned to multiple locs yet");
    LastVal = VA.getValNo();
Chris Lattner's avatar
Chris Lattner committed
    if (VA.isRegLoc()) {
      MVT RegVT = VA.getLocVT();
Devang Patel's avatar
 
Devang Patel committed
      TargetRegisterClass *RC = NULL;
Chris Lattner's avatar
Chris Lattner committed
      if (RegVT == MVT::i32)
        RC = X86::GR32RegisterClass;
      else if (Is64Bit && RegVT == MVT::i64)
Chris Lattner's avatar
Chris Lattner committed
        RC = X86::GR64RegisterClass;
Dale Johannesen's avatar
Dale Johannesen committed
      else if (RegVT == MVT::f32)
Chris Lattner's avatar
Chris Lattner committed
        RC = X86::FR32RegisterClass;
Dale Johannesen's avatar
Dale Johannesen committed
      else if (RegVT == MVT::f64)
Chris Lattner's avatar
Chris Lattner committed
        RC = X86::FR64RegisterClass;
      else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
Evan Cheng's avatar
Evan Cheng committed
        RC = X86::VR128RegisterClass;
      else if (RegVT.isVector()) {
        assert(RegVT.getSizeInBits() == 64);
Evan Cheng's avatar
Evan Cheng committed
        if (!Is64Bit)
          RC = X86::VR64RegisterClass;     // MMX values are passed in MMXs.
        else {
          // Darwin calling convention passes MMX values in either GPRs or
          // XMMs in x86-64. Other targets pass them in memory.
          if (RegVT != MVT::v1i64 && Subtarget->hasSSE2()) {
            RC = X86::VR128RegisterClass;  // MMX values are passed in XMMs.
            RegVT = MVT::v2i64;
          } else {
            RC = X86::GR64RegisterClass;   // v1i64 values are passed in GPRs.
            RegVT = MVT::i64;
          }
        }
      } else {
        assert(0 && "Unknown argument type!");
      unsigned Reg = DAG.getMachineFunction().addLiveIn(VA.getLocReg(), RC);
      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
Chris Lattner's avatar
Chris Lattner committed
      // If this is an 8 or 16-bit value, it is really passed promoted to 32
      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
      // right size.
      if (VA.getLocInfo() == CCValAssign::SExt)
        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
Chris Lattner's avatar
Chris Lattner committed
                               DAG.getValueType(VA.getValVT()));
      else if (VA.getLocInfo() == CCValAssign::ZExt)
        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
Chris Lattner's avatar
Chris Lattner committed
                               DAG.getValueType(VA.getValVT()));
Chris Lattner's avatar
Chris Lattner committed
      if (VA.getLocInfo() != CCValAssign::Full)
        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
      // Handle MMX values passed in GPRs.
      if (Is64Bit && RegVT != VA.getLocVT()) {
        if (RegVT.getSizeInBits() == 64 && RC == X86::GR64RegisterClass)
          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
        else if (RC == X86::VR128RegisterClass) {
          ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
                                 ArgValue, DAG.getConstant(0, MVT::i64));
          ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), ArgValue);
Chris Lattner's avatar
Chris Lattner committed
      ArgValues.push_back(ArgValue);
    } else {
      assert(VA.isMemLoc());
      ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
Chris Lattner's avatar
Chris Lattner committed
    }
  }
  // The x86-64 ABI for returning structs by value requires that we copy
  // the sret argument into %rax for the return. Save the argument into
  // a virtual register so that we can access it from the return points.
  if (Is64Bit && DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
    MachineFunction &MF = DAG.getMachineFunction();
    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
    unsigned Reg = FuncInfo->getSRetReturnReg();
    if (!Reg) {
      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
      FuncInfo->setSRetReturnReg(Reg);
    }
    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, ArgValues[0]);
    Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Root);
Chris Lattner's avatar
Chris Lattner committed
  unsigned StackSize = CCInfo.getNextStackOffset();
  // align stack specially for tail calls
Evan Cheng's avatar
Evan Cheng committed
  if (PerformTailCallOpt && CC == CallingConv::Fast)
    StackSize = GetAlignedArgumentStackSize(StackSize, DAG);

Chris Lattner's avatar
Chris Lattner committed
  // If the function takes variable number of arguments, make a frame index for
  // the start of the first vararg value... for expansion of llvm.va_start.
  if (isVarArg) {
    if (Is64Bit || CC != CallingConv::X86_FastCall) {
      VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
      unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;

      // FIXME: We should really autogenerate these arrays
      static const unsigned GPR64ArgRegsWin64[] = {
        X86::RCX, X86::RDX, X86::R8,  X86::R9
      };
      static const unsigned XMMArgRegsWin64[] = {
        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
      static const unsigned GPR64ArgRegs64Bit[] = {
        X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
      };
      static const unsigned XMMArgRegs64Bit[] = {
        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
        X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
      };
      const unsigned *GPR64ArgRegs, *XMMArgRegs;

      if (IsWin64) {
        TotalNumIntRegs = 4; TotalNumXMMRegs = 4;
        GPR64ArgRegs = GPR64ArgRegsWin64;
        XMMArgRegs = XMMArgRegsWin64;
      } else {
        TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
        GPR64ArgRegs = GPR64ArgRegs64Bit;
        XMMArgRegs = XMMArgRegs64Bit;
      }
      unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
                                                       TotalNumIntRegs);
      unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs,
                                                       TotalNumXMMRegs);

      bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
      assert(!(NumXMMRegs && !Subtarget->hasSSE1()) &&
             "SSE register cannot be used when SSE is disabled!");
      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
             "SSE register cannot be used when SSE is disabled!");
      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1())
        // Kernel mode asks for SSE to be disabled, so don't push them
        // on the stack.
        TotalNumXMMRegs = 0;
      // For X86-64, if there are vararg parameters that are passed via
      // registers, then we must store them to their spots on the stack so they
      // may be loaded by deferencing the result of va_next.
      VarArgsGPOffset = NumIntRegs * 8;
      VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
      RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
                                                 TotalNumXMMRegs * 16, 16);

      // Store the integer parameter registers.
Dan Gohman's avatar
Dan Gohman committed
      SmallVector<SDValue, 8> MemOps;
      SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
      SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                                  DAG.getIntPtrConstant(VarArgsGPOffset));
      for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
        unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
                                     X86::GR64RegisterClass);
        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::i64);
Dan Gohman's avatar
Dan Gohman committed
        SDValue Store =
          DAG.getStore(Val.getValue(1), dl, Val, FIN,
                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                          DAG.getIntPtrConstant(8));
      // Now store the XMM (fp + vector) parameter registers.
      FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
                        DAG.getIntPtrConstant(VarArgsFPOffset));
      for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
        unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs],
                                     X86::VR128RegisterClass);
        SDValue Val = DAG.getCopyFromReg(Root, dl, VReg, MVT::v4f32);
Dan Gohman's avatar
Dan Gohman committed
        SDValue Store =
          DAG.getStore(Val.getValue(1), dl, Val, FIN,
                       PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 0);
        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
                          DAG.getIntPtrConstant(16));
          Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Chris Lattner's avatar
Chris Lattner committed
    }
  if (IsCalleePop(isVarArg, CC)) {
    BytesToPopOnReturn  = StackSize; // Callee pops everything.
    BytesToPopOnReturn  = 0; // Callee pops nothing.
    // If this is an sret function, the return should pop the hidden pointer.
    if (!Is64Bit && CC != CallingConv::Fast && ArgsAreStructReturn(Op))

  if (!Is64Bit) {
    RegSaveFrameIndex = 0xAAAAAAA;   // RegSaveFrameIndex is X86-64 only.
    if (CC == CallingConv::X86_FastCall)
      VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
  }

  FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn);

  // Return the new list of results.
  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
                     &ArgValues[0], ArgValues.size()).getValue(Op.getResNo());
Dan Gohman's avatar
Dan Gohman committed
SDValue
X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
Dan Gohman's avatar
Dan Gohman committed
                                    const SDValue &StackPtr,
Dan Gohman's avatar
Dan Gohman committed
                                    SDValue Chain,
                                    SDValue Arg, ISD::ArgFlagsTy Flags) {
  DebugLoc dl = TheCall->getDebugLoc();
  unsigned LocMemOffset = VA.getLocMemOffset();
Dan Gohman's avatar
Dan Gohman committed
  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
  if (Flags.isByVal()) {
    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
  return DAG.getStore(Chain, dl, Arg, PtrOff,
                      PseudoSourceValue::getStack(), LocMemOffset);
/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
/// optimization is performed and it is required.
SDValue
X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
Dan Gohman's avatar
Dan Gohman committed
                                           SDValue &OutRetAddr,
                                           SDValue Chain,
                                           bool IsTailCall,
                                           bool Is64Bit,
                                           int FPDiff,
                                           DebugLoc dl) {
  if (!IsTailCall || FPDiff==0) return Chain;

  // Adjust the Return address stack slot.
  MVT VT = getPointerTy();
  OutRetAddr = getReturnAddressFrameIndex(DAG);
  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0);
}

/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
Dan Gohman's avatar
Dan Gohman committed
                         SDValue Chain, SDValue RetAddrFrIdx,
                         bool Is64Bit, int FPDiff, DebugLoc dl) {
  // Store the return address to the appropriate stack slot.
  if (!FPDiff) return Chain;
  // Calculate the new stack slot for the return address.
  int SlotSize = Is64Bit ? 8 : 4;
    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
  MVT VT = Is64Bit ? MVT::i64 : MVT::i32;
Dan Gohman's avatar
Dan Gohman committed
  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
  Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
                       PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0);
Dan Gohman's avatar
Dan Gohman committed
SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
  MachineFunction &MF = DAG.getMachineFunction();
  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
  SDValue Chain       = TheCall->getChain();
  unsigned CC         = TheCall->getCallingConv();
  bool isVarArg       = TheCall->isVarArg();
  bool IsTailCall     = TheCall->isTailCall() &&
                        CC == CallingConv::Fast && PerformTailCallOpt;
  SDValue Callee      = TheCall->getCallee();
  bool Is64Bit        = Subtarget->is64Bit();
  bool IsStructRet    = CallIsStructReturn(TheCall);
  DebugLoc dl         = TheCall->getDebugLoc();

  assert(!(isVarArg && CC == CallingConv::Fast) &&
         "Var args not supported with calling convention fastcc");

  // Analyze operands of the call, assigning locations to each operand.
Chris Lattner's avatar
Chris Lattner committed
  SmallVector<CCValAssign, 16> ArgLocs;
  CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
  CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC));
Chris Lattner's avatar
Chris Lattner committed
  // Get a count of how many bytes are to be pushed on the stack.
  unsigned NumBytes = CCInfo.getNextStackOffset();
  if (PerformTailCallOpt && CC == CallingConv::Fast)
    NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
  int FPDiff = 0;
  if (IsTailCall) {
    // Lower arguments at fp - stackoffset + fpdiff.
    unsigned NumBytesCallerPushed =
      MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
    FPDiff = NumBytesCallerPushed - NumBytes;

    // Set the delta of movement of the returnaddr stackslot.
    // But only set if delta is greater than previous delta.
    if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
      MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
  }

  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
Dan Gohman's avatar
Dan Gohman committed
  SDValue RetAddrFrIdx;
  // Load return adress for tail calls.
  Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, IsTailCall, Is64Bit,
Dan Gohman's avatar
Dan Gohman committed
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
  SmallVector<SDValue, 8> MemOpChains;
  SDValue StackPtr;
  // Walk the register/memloc assignments, inserting copies/loads.  In the case
  // of tail call optimization arguments are handle later.
Chris Lattner's avatar
Chris Lattner committed
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    CCValAssign &VA = ArgLocs[i];
    SDValue Arg = TheCall->getArg(i);
    ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
    bool isByVal = Flags.isByVal();
Chris Lattner's avatar
Chris Lattner committed
    // Promote the value if needed.
    switch (VA.getLocInfo()) {
    default: assert(0 && "Unknown loc info!");
    case CCValAssign::Full: break;
    case CCValAssign::SExt:
      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
Chris Lattner's avatar
Chris Lattner committed
      break;
    case CCValAssign::ZExt:
      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
Chris Lattner's avatar
Chris Lattner committed
      break;
    case CCValAssign::AExt:
      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
Chris Lattner's avatar
Chris Lattner committed
    if (VA.isRegLoc()) {
        MVT RegVT = VA.getLocVT();
        if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
          switch (VA.getLocReg()) {
          default:
            break;
          case X86::RDI: case X86::RSI: case X86::RDX: case X86::RCX:
          case X86::R8: {
            // Special case: passing MMX values in GPR registers.
            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
            break;
          }
          case X86::XMM0: case X86::XMM1: case X86::XMM2: case X86::XMM3:
          case X86::XMM4: case X86::XMM5: case X86::XMM6: case X86::XMM7: {
            // Special case: passing MMX values in XMM registers.
            Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg);
            Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
            Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
Chris Lattner's avatar
Chris Lattner committed
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    } else {
      if (!IsTailCall || (IsTailCall && isByVal)) {
          StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
        MemOpChains.push_back(LowerMemOpCallTo(TheCall, DAG, StackPtr, VA,
                                               Chain, Arg, Flags));
    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
  // Build a sequence of copy-to-reg nodes chained together with token chain
  // and flag operands which copy the outgoing args into registers.
Dan Gohman's avatar
Dan Gohman committed
  SDValue InFlag;
  // Tail call byval lowering might overwrite argument registers so in case of
  // tail call optimization the copies to registers are lowered later.
  if (!IsTailCall)
    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                               RegsToPass[i].second, InFlag);
      InFlag = Chain.getValue(1);
    }
  // ELF / PIC requires GOT in the EBX register before function calls via PLT
  if (CallRequiresGOTPtrInReg(Is64Bit, IsTailCall)) {
    Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
                             DAG.getNode(X86ISD::GlobalBaseReg,
                                         DebugLoc::getUnknownLoc(),
                             InFlag);
    InFlag = Chain.getValue(1);
  }
  // If we are tail calling and generating PIC/GOT style code load the address
  // of the callee into ecx. The value in ecx is used as target of the tail
  // jump. This is done to circumvent the ebx/callee-saved problem for tail
  // calls on PIC/GOT architectures. Normally we would just put the address of
  // GOT into ebx and then call target@PLT. But for tail callss ebx would be
  // restored (since ebx is callee saved) before jumping to the target@PLT.
  if (CallRequiresFnAddressInReg(Is64Bit, IsTailCall)) {
    // Note: The actual moving to ecx is done further down.
    GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
    if (G && !G->getGlobal()->hasHiddenVisibility() &&
        !G->getGlobal()->hasProtectedVisibility())
      Callee =  LowerGlobalAddress(Callee, DAG);
    else if (isa<ExternalSymbolSDNode>(Callee))
      Callee = LowerExternalSymbol(Callee,DAG);
Chris Lattner's avatar
Chris Lattner committed
    // From AMD64 ABI document:
    // For calls that may call functions that use varargs or stdargs
    // (prototype-less calls or calls to functions containing ellipsis (...) in
    // the declaration) %al is used as hidden argument to specify the number
    // of SSE registers used. The contents of %al do not need to match exactly
    // the number of registers, but must be an ubound on the number of SSE
    // registers used and is in the range 0 - 8 inclusive.
Chris Lattner's avatar
Chris Lattner committed
    // Count the number of XMM registers allocated.
    static const unsigned XMMArgRegs[] = {
      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
    };
    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
    assert((Subtarget->hasSSE1() || !NumXMMRegs)
           && "SSE registers cannot be used when SSE is disabled");
    Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
Chris Lattner's avatar
Chris Lattner committed
                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
    InFlag = Chain.getValue(1);
  }

  // For tail calls lower the arguments to the 'real' stack slot.
Dan Gohman's avatar
Dan Gohman committed
    SmallVector<SDValue, 8> MemOpChains2;
    SDValue FIN;
    // Do not flag preceeding copytoreg stuff together with the following stuff.
Dan Gohman's avatar
Dan Gohman committed
    InFlag = SDValue();
    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
      CCValAssign &VA = ArgLocs[i];
      if (!VA.isRegLoc()) {
        SDValue Arg = TheCall->getArg(i);
        ISD::ArgFlagsTy Flags = TheCall->getArgFlags(i);
        // Create frame index.
        int32_t Offset = VA.getLocMemOffset()+FPDiff;
        uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
        FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
        FIN = DAG.getFrameIndex(FI, getPointerTy());
        if (Flags.isByVal()) {
Evan Cheng's avatar
Evan Cheng committed
          // Copy relative to framepointer.
Dan Gohman's avatar
Dan Gohman committed
          SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
            StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
          Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);

          MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, Chain,
                                                           Flags, DAG, dl));
Evan Cheng's avatar
Evan Cheng committed
          // Store relative to framepointer.
            DAG.getStore(Chain, dl, Arg, FIN,
                         PseudoSourceValue::getFixedStack(FI), 0));
      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                          &MemOpChains2[0], MemOpChains2.size());
    // Copy arguments to their registers.
    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
                               RegsToPass[i].second, InFlag);
      InFlag = Chain.getValue(1);
    }
Dan Gohman's avatar
Dan Gohman committed
    InFlag =SDValue();
    // Store the return address to the appropriate stack slot.
    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    // We should use extra load for direct calls to dllimported functions in
    // non-JIT mode.
    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
                                        getTargetMachine(), true))
      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy(),
                                          G->getOffset());
  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
    unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
    Chain = DAG.getCopyToReg(Chain,  dl,
                             DAG.getRegister(Opc, getPointerTy()),
                             Callee,InFlag);
    Callee = DAG.getRegister(Opc, getPointerTy());
    // Add register as live out.
    DAG.getMachineFunction().getRegInfo().addLiveOut(Opc);
  // Returns a chain & a flag for retval copy to use.
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
Dan Gohman's avatar
Dan Gohman committed
  SmallVector<SDValue, 8> Ops;
    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
                           DAG.getIntPtrConstant(0, true), InFlag);
    // Returns a chain & a flag for retval copy to use.
    NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
    Ops.clear();
  }
  Ops.push_back(Chain);
  Ops.push_back(Callee);

  if (IsTailCall)
    Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));

  // Add argument registers to the end of the list so that they are known live
  // into the call.
Evan Cheng's avatar
Evan Cheng committed
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
                                  RegsToPass[i].second.getValueType()));
  // Add an implicit use GOT pointer in EBX.
  if (!IsTailCall && !Is64Bit &&
      getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
      Subtarget->isPICStyleGOT())
    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));

  // Add an implicit use of AL for x86 vararg functions.
  if (Is64Bit && isVarArg)
    Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));

           "Flag must be set. Depend on flag being set in LowerRET");
    Chain = DAG.getNode(X86ISD::TAILCALL, dl,
                        TheCall->getVTList(), &Ops[0], Ops.size());
  Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());

  // Create the CALLSEQ_END node.
  unsigned NumBytesForCalleeToPush;
  if (IsCalleePop(isVarArg, CC))
    NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
  else if (!Is64Bit && CC != CallingConv::Fast && IsStructRet)
    // If this is is a call to a struct-return function, the callee
    // pops the hidden struct pointer, so we have to push it back.
    // This is common for Darwin/X86, Linux & Mingw32 targets.
    NumBytesForCalleeToPush = 4;
  else
    NumBytesForCalleeToPush = 0;  // Callee pops nothing.
  // Returns a flag for retval copy to use.
  Chain = DAG.getCALLSEQ_END(Chain,
                             DAG.getIntPtrConstant(NumBytes, true),
                             DAG.getIntPtrConstant(NumBytesForCalleeToPush,
                                                   true),
  // Handle result values, copying them out of physregs into vregs that we
  // return.
  return SDValue(LowerCallResult(Chain, InFlag, TheCall, CC, DAG),
Gabor Greif's avatar
Gabor Greif committed
                 Op.getResNo());
Chris Lattner's avatar
Chris Lattner committed

//===----------------------------------------------------------------------===//
//                Fast Calling Convention (tail call) implementation
//===----------------------------------------------------------------------===//

//  Like std call, callee cleans arguments, convention except that ECX is
//  reserved for storing the tail called function address. Only 2 registers are
//  free for argument passing (inreg). Tail call optimization is performed
//  provided:
//                * tailcallopt is enabled
//                * caller/callee are fastcc
//  On X86_64 architecture with GOT-style position independent code only local
//  (within module) calls are supported at the moment.
//  To keep the stack aligned according to platform abi the function
//  GetAlignedArgumentStackSize ensures that argument delta is always multiples
//  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
//  If a tail called function callee has more arguments than the caller the
//  caller needs to make sure that there is room to move the RETADDR to. This is
//  achieved by reserving an area the size of the argument delta right after the
//  original REtADDR, but before the saved framepointer or the spilled registers
//  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
//  stack layout:
//    arg1
//    arg2
//    RETADDR
//      move area ]
//    (possible EBP)
//    ESI
//    EDI
//    local1 ..

/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
/// for a 16 byte align requirement.
unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
Evan Cheng's avatar
Evan Cheng committed
  MachineFunction &MF = DAG.getMachineFunction();
  const TargetMachine &TM = MF.getTarget();
  const TargetFrameInfo &TFI = *TM.getFrameInfo();
  unsigned StackAlignment = TFI.getStackAlignment();
  uint64_t AlignMask = StackAlignment - 1;
Evan Cheng's avatar
Evan Cheng committed
  int64_t Offset = StackSize;
  uint64_t SlotSize = TD->getPointerSize();
Evan Cheng's avatar
Evan Cheng committed
  if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
    // Number smaller than 12 so just add the difference.
    Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
  } else {
    // Mask out lower bits, add stackalignment once plus the 12 bytes.
    Offset = ((~AlignMask) & Offset) + StackAlignment +
Evan Cheng's avatar
Evan Cheng committed
      (StackAlignment-SlotSize);
Evan Cheng's avatar
Evan Cheng committed
  return Offset;
}

/// IsEligibleForTailCallElimination - Check to see whether the next instruction
/// following the call is a return. A function is eligible if caller/callee
/// calling conventions match, currently only fastcc supports tail calls, and
/// the function CALL is immediatly followed by a RET.
bool X86TargetLowering::IsEligibleForTailCallOptimization(CallSDNode *TheCall,
Dan Gohman's avatar
Dan Gohman committed
                                                      SDValue Ret,
                                                      SelectionDAG& DAG) const {
  if (!PerformTailCallOpt)
    return false;

  if (CheckTailCallReturnConstraints(TheCall, Ret)) {
    MachineFunction &MF = DAG.getMachineFunction();
    unsigned CallerCC = MF.getFunction()->getCallingConv();
    unsigned CalleeCC= TheCall->getCallingConv();
    if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
      SDValue Callee = TheCall->getCallee();
      // On x86/32Bit PIC/GOT  tail calls are supported.
      if (getTargetMachine().getRelocationModel() != Reloc::PIC_ ||
          !Subtarget->isPICStyleGOT()|| !Subtarget->is64Bit())
      // Can only do local tail calls (in same module, hidden or protected) on
      // x86_64 PIC/GOT at the moment.
      if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
        return G->getGlobal()->hasHiddenVisibility()
            || G->getGlobal()->hasProtectedVisibility();
    }
  }

  return false;
}

FastISel *
X86TargetLowering::createFastISel(MachineFunction &mf,
Devang Patel's avatar
 
Devang Patel committed
                                  DwarfWriter *dw,
                                  DenseMap<const Value *, unsigned> &vm,
                                  DenseMap<const BasicBlock *,
                                           MachineBasicBlock *> &bm,
                                  DenseMap<const AllocaInst *, int> &am
#ifndef NDEBUG
                                  , SmallSet<Instruction*, 8> &cil
#endif
                                  ) {
Devang Patel's avatar
 
Devang Patel committed
  return X86::createFastISel(mf, mmo, dw, vm, bm, am
Chris Lattner's avatar
Chris Lattner committed
//===----------------------------------------------------------------------===//
//                           Other Lowering Hooks
//===----------------------------------------------------------------------===//


Dan Gohman's avatar
Dan Gohman committed
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
  MachineFunction &MF = DAG.getMachineFunction();
  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
  int ReturnAddrIndex = FuncInfo->getRAIndex();