Skip to content
X86ISelLowering.cpp 207 KiB
Newer Older
Evan Cheng's avatar
Evan Cheng committed
      MemOps.push_back(Store);
      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
                        DAG.getConstant(8, getPointerTy()));
    }

    // Now store the XMM (fp + vector) parameter registers.
    FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
                      DAG.getConstant(VarArgsFPOffset, getPointerTy()));
    for (; NumXMMRegs != 8; ++NumXMMRegs) {
      unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
                                X86::VR128RegisterClass);
      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
Evan Cheng's avatar
Evan Cheng committed
      MemOps.push_back(Store);
      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
                        DAG.getConstant(16, getPointerTy()));
    }
    if (!MemOps.empty())
        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
                           &MemOps[0], MemOps.size());
  }

  ArgValues.push_back(Root);

  ReturnAddrIndex = 0;     // No return address slot generated yet.
  BytesToPopOnReturn = 0;  // Callee pops nothing.
  BytesCallerReserves = ArgOffset;

  // Return the new list of results.
  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
                                     Op.Val->value_end());
  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
}

SDOperand
X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) {
  SDOperand Chain     = Op.getOperand(0);
  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
  SDOperand Callee    = Op.getOperand(4);
  MVT::ValueType RetVT= Op.Val->getValueType(0);
  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;

  // Count how many bytes are to be pushed on the stack.
  unsigned NumBytes = 0;
  unsigned NumIntRegs = 0;  // Int regs used for parameter passing.
  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.

  static const unsigned GPR8ArgRegs[] = {
    X86::DIL, X86::SIL, X86::DL,  X86::CL,  X86::R8B, X86::R9B
  };
  static const unsigned GPR16ArgRegs[] = {
    X86::DI,  X86::SI,  X86::DX,  X86::CX,  X86::R8W, X86::R9W
  };
  static const unsigned GPR32ArgRegs[] = {
    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
  };
  static const unsigned GPR64ArgRegs[] = {
    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
  };
  static const unsigned XMMArgRegs[] = {
    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
  };

  for (unsigned i = 0; i != NumOps; ++i) {
    SDOperand Arg = Op.getOperand(5+2*i);
    MVT::ValueType ArgVT = Arg.getValueType();

    switch (ArgVT) {
    default: assert(0 && "Unknown value type!");
    case MVT::i8:
    case MVT::i16:
    case MVT::i32:
    case MVT::i64:
      if (NumIntRegs < 6)
        ++NumIntRegs;
      else
        NumBytes += 8;
      break;
    case MVT::f32:
    case MVT::f64:
    case MVT::v16i8:
    case MVT::v8i16:
    case MVT::v4i32:
    case MVT::v2i64:
    case MVT::v4f32:
    case MVT::v2f64:
      if (NumXMMRegs < 8)
        NumXMMRegs++;
      else if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
        NumBytes += 8;
      else {
        // XMM arguments have to be aligned on 16-byte boundary.
        NumBytes = ((NumBytes + 15) / 16) * 16;
        NumBytes += 16;
      }
      break;
    }
  }

  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));

  // Arguments go on the stack in reverse order, as specified by the ABI.
  unsigned ArgOffset = 0;
  NumIntRegs = 0;
  NumXMMRegs = 0;
  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
  std::vector<SDOperand> MemOpChains;
  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
  for (unsigned i = 0; i != NumOps; ++i) {
    SDOperand Arg = Op.getOperand(5+2*i);
    MVT::ValueType ArgVT = Arg.getValueType();

    switch (ArgVT) {
    default: assert(0 && "Unexpected ValueType for argument!");
    case MVT::i8:
    case MVT::i16:
    case MVT::i32:
    case MVT::i64:
      if (NumIntRegs < 6) {
        unsigned Reg = 0;
        switch (ArgVT) {
        default: break;
        case MVT::i8:  Reg = GPR8ArgRegs[NumIntRegs];  break;
        case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break;
        case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break;
        case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break;
        }
        RegsToPass.push_back(std::make_pair(Reg, Arg));
        ++NumIntRegs;
      } else {
        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
Evan Cheng's avatar
Evan Cheng committed
        ArgOffset += 8;
      }
      break;
    case MVT::f32:
    case MVT::f64:
    case MVT::v16i8:
    case MVT::v8i16:
    case MVT::v4i32:
    case MVT::v2i64:
    case MVT::v4f32:
    case MVT::v2f64:
      if (NumXMMRegs < 8) {
        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
        NumXMMRegs++;
      } else {
        if (ArgVT != MVT::f32 && ArgVT != MVT::f64) {
          // XMM arguments have to be aligned on 16-byte boundary.
          ArgOffset = ((ArgOffset + 15) / 16) * 16;
        }
        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
Evan Cheng's avatar
Evan Cheng committed
        if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
          ArgOffset += 8;
        else
          ArgOffset += 16;
      }
    }
  }

  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
                        &MemOpChains[0], MemOpChains.size());

  // Build a sequence of copy-to-reg nodes chained together with token chain
  // and flag operands which copy the outgoing args into registers.
  SDOperand InFlag;
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
                             InFlag);
    InFlag = Chain.getValue(1);
  }

  if (isVarArg) {
    // From AMD64 ABI document:
    // For calls that may call functions that use varargs or stdargs
    // (prototype-less calls or calls to functions containing ellipsis (...) in
    // the declaration) %al is used as hidden argument to specify the number
    // of SSE registers used. The contents of %al do not need to match exactly
    // the number of registers, but must be an ubound on the number of SSE
    // registers used and is in the range 0 - 8 inclusive.
    Chain = DAG.getCopyToReg(Chain, X86::AL,
                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
    InFlag = Chain.getValue(1);
  }

  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    // We should use extra load for direct calls to dllimported functions
    if (!((Subtarget->isTargetCygwin() || Subtarget->isTargetWindows()) &&
          WindowsGVRequiresExtraLoad(G->getGlobal())))
      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Evan Cheng's avatar
Evan Cheng committed
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());

  std::vector<MVT::ValueType> NodeTys;
  NodeTys.push_back(MVT::Other);   // Returns a chain
  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
  std::vector<SDOperand> Ops;
  Ops.push_back(Chain);
  Ops.push_back(Callee);

  // Add argument registers to the end of the list so that they are known live
  // into the call.
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
    Ops.push_back(DAG.getRegister(RegsToPass[i].first, 
                                  RegsToPass[i].second.getValueType()));

  if (InFlag.Val)
    Ops.push_back(InFlag);

  // FIXME: Do not generate X86ISD::TAILCALL for now.
  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
                      NodeTys, &Ops[0], Ops.size());
  InFlag = Chain.getValue(1);

  NodeTys.clear();
  NodeTys.push_back(MVT::Other);   // Returns a chain
  if (RetVT != MVT::Other)
    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
  Ops.clear();
  Ops.push_back(Chain);
  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
  Ops.push_back(DAG.getConstant(0, getPointerTy()));
  Ops.push_back(InFlag);
  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
  if (RetVT != MVT::Other)
    InFlag = Chain.getValue(1);
  
  std::vector<SDOperand> ResultVals;
  NodeTys.clear();
  switch (RetVT) {
  default: assert(0 && "Unknown value type to return!");
  case MVT::Other: break;
  case MVT::i8:
    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
    ResultVals.push_back(Chain.getValue(0));
    NodeTys.push_back(MVT::i8);
    break;
  case MVT::i16:
    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
    ResultVals.push_back(Chain.getValue(0));
    NodeTys.push_back(MVT::i16);
    break;
  case MVT::i32:
    Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
    ResultVals.push_back(Chain.getValue(0));
    NodeTys.push_back(MVT::i32);
    break;
  case MVT::i64:
    if (Op.Val->getValueType(1) == MVT::i64) {
      // FIXME: __int128 support?
      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
      ResultVals.push_back(Chain.getValue(0));
      Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64,
                                 Chain.getValue(2)).getValue(1);
      ResultVals.push_back(Chain.getValue(0));
      NodeTys.push_back(MVT::i64);
    } else {
      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
      ResultVals.push_back(Chain.getValue(0));
    }
    NodeTys.push_back(MVT::i64);
    break;
  case MVT::f32:
  case MVT::f64:
  case MVT::v16i8:
  case MVT::v8i16:
  case MVT::v4i32:
  case MVT::v2i64:
  case MVT::v4f32:
  case MVT::v2f64:
    // FIXME: long double support?
    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
    ResultVals.push_back(Chain.getValue(0));
    NodeTys.push_back(RetVT);
    break;
  }

  // If the function returns void, just return the chain.
  if (ResultVals.empty())
    return Chain;
  
  // Otherwise, merge everything together with a MERGE_VALUES node.
  NodeTys.push_back(MVT::Other);
  ResultVals.push_back(Chain);
  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
                              &ResultVals[0], ResultVals.size());
  return Res.getValue(Op.ResNo);
}

//===----------------------------------------------------------------------===//
//                    Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
//
// The X86 'fast' calling convention passes up to two integer arguments in
// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
// and requires that the callee pop its arguments off the stack (allowing proper
// tail calls), and has the same return value conventions as C calling convs.
//
// This calling convention always arranges for the callee pop value to be 8n+4
// bytes, which is needed for tail recursion elimination and stack alignment
// reasons.
//
// Note that this can be enhanced in the future to pass fp vals in registers
// (when we have a global fp allocator) and do other tricks.
//

/// HowToPassFastCCArgument - Returns how an formal argument of the specified
/// type should be passed. If it is through stack, returns the size of the stack
Evan Cheng's avatar
Evan Cheng committed
/// slot; if it is through integer or XMM register, returns the number of
/// integer or XMM registers are needed.
HowToPassFastCCArgument(MVT::ValueType ObjectVT,
                        unsigned NumIntRegs, unsigned NumXMMRegs,
                        unsigned &ObjSize, unsigned &ObjIntRegs,
                        unsigned &ObjXMMRegs) {
Evan Cheng's avatar
Evan Cheng committed
  ObjIntRegs = 0;
  ObjXMMRegs = 0;

  switch (ObjectVT) {
  default: assert(0 && "Unhandled argument type!");
  case MVT::i8:
    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
      ObjIntRegs = 1;
    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
      ObjIntRegs = 1;
    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
      ObjIntRegs = 1;
    if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
      ObjIntRegs = 2;
    } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
      ObjIntRegs = 1;
      ObjSize = 8;
  case MVT::f32:
    ObjSize = 4;
    break;
  case MVT::f64:
    ObjSize = 8;
    break;
  case MVT::v16i8:
  case MVT::v8i16:
  case MVT::v4i32:
  case MVT::v2i64:
  case MVT::v4f32:
  case MVT::v2f64:
SDOperand
X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
  unsigned NumArgs = Op.Val->getNumValues()-1;
  MachineFunction &MF = DAG.getMachineFunction();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  SDOperand Root = Op.getOperand(0);
  std::vector<SDOperand> ArgValues;
  // Add DAG nodes to load the arguments...  On entry to a function the stack
  // frame looks like this:
  //
  // [ESP] -- return address
  // [ESP + 4] -- first nonreg argument (leftmost lexically)
Evan Cheng's avatar
Evan Cheng committed
  // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot

  // Keep track of the number of integer regs passed so far.  This can be either
  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
  // used).
  unsigned NumIntRegs = 0;
  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
  
  for (unsigned i = 0; i < NumArgs; ++i) {
    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
    unsigned ArgIncrement = 4;
    unsigned ObjSize = 0;
    unsigned ObjIntRegs = 0;
    unsigned ObjXMMRegs = 0;

    HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
                            ObjSize, ObjIntRegs, ObjXMMRegs);
    if (ObjSize > 4)
Evan Cheng's avatar
Evan Cheng committed
    unsigned Reg = 0;
    SDOperand ArgValue;
    if (ObjIntRegs || ObjXMMRegs) {
      switch (ObjectVT) {
      default: assert(0 && "Unhandled argument type!");
      case MVT::i8:
        Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
                        X86::GR8RegisterClass);
        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8);
        break;
      case MVT::i16:
        Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
                        X86::GR16RegisterClass);
        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16);
        break;
      case MVT::i32:
        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
                        X86::GR32RegisterClass);
        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
        break;
      case MVT::i64:
        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
                        X86::GR32RegisterClass);
        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
        if (ObjIntRegs == 2) {
          Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass);
          SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32);
          ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
        break;
      case MVT::v16i8:
      case MVT::v8i16:
      case MVT::v4i32:
      case MVT::v2i64:
      case MVT::v4f32:
      case MVT::v2f64:
        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
        break;
      NumIntRegs += ObjIntRegs;
      NumXMMRegs += ObjXMMRegs;
    }
      // XMM arguments have to be aligned on 16-byte boundary.
      if (ObjSize == 16)
        ArgOffset = ((ArgOffset + 15) / 16) * 16;
      // Create the SelectionDAG nodes corresponding to a load from this
      // parameter.
      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
      if (ObjectVT == MVT::i64 && ObjIntRegs) {
        SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
        ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
      } else
        ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
      ArgOffset += ArgIncrement;   // Move on to the next argument.
  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
  // arguments and the arguments after the retaddr has been pushed are aligned.
  if ((ArgOffset & 7) == 0)
    ArgOffset += 4;
  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
Evan Cheng's avatar
Evan Cheng committed
  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
  ReturnAddrIndex = 0;             // No return address slot generated yet.
  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
  BytesCallerReserves = 0;

  // Finally, inform the code generator which regs we return values in.
  switch (getValueType(MF.getFunction()->getReturnType())) {
  default: assert(0 && "Unknown type!");
  case MVT::isVoid: break;
  case MVT::i8:
  case MVT::i16:
  case MVT::i32:
    MF.addLiveOut(X86::EAX);
    break;
  case MVT::i64:
    MF.addLiveOut(X86::EAX);
    MF.addLiveOut(X86::EDX);
    break;
  case MVT::f32:
  case MVT::f64:
    MF.addLiveOut(X86::ST0);
    break;
Evan Cheng's avatar
Evan Cheng committed
  case MVT::v16i8:
  case MVT::v8i16:
  case MVT::v4i32:
  case MVT::v2i64:
  case MVT::v4f32:
  case MVT::v2f64:
  // Return the new list of results.
  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
                                     Op.Val->value_end());
  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
                                               bool isFastCall) {
  SDOperand Chain     = Op.getOperand(0);
  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
  SDOperand Callee    = Op.getOperand(4);
  MVT::ValueType RetVT= Op.Val->getValueType(0);
  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;

  // Count how many bytes are to be pushed on the stack.
  unsigned NumBytes = 0;

  // Keep track of the number of integer regs passed so far.  This can be either
  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
  // used).
  unsigned NumIntRegs = 0;
  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.

  static const unsigned GPRArgRegs[][2] = {
    { X86::AL,  X86::DL },
    { X86::AX,  X86::DX },
    { X86::EAX, X86::EDX }
  };
Reid Spencer's avatar
Reid Spencer committed
#if 0
  static const unsigned FastCallGPRArgRegs[][2] = {
    { X86::CL,  X86::DL },
    { X86::CX,  X86::DX },
    { X86::ECX, X86::EDX }
  };  
Reid Spencer's avatar
Reid Spencer committed
#endif
  static const unsigned XMMArgRegs[] = {
    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
  };

  for (unsigned i = 0; i != NumOps; ++i) {
    SDOperand Arg = Op.getOperand(5+2*i);
    default: assert(0 && "Unknown value type!");
    case MVT::i8:
    case MVT::i16:
Nick Lewycky's avatar
Nick Lewycky committed
    case MVT::i32: {
     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
     if (NumIntRegs < MaxNumIntRegs) {
       ++NumIntRegs;
       break;
     }
Nick Lewycky's avatar
Nick Lewycky committed
     } // Fall through
    case MVT::f32:
      NumBytes += 4;
      break;
    case MVT::f64:
      NumBytes += 8;
      break;
    case MVT::v16i8:
    case MVT::v8i16:
    case MVT::v4i32:
    case MVT::v2i64:
    case MVT::v4f32:
Evan Cheng's avatar
Evan Cheng committed
    case MVT::v2f64:
     if (isFastCall) {
      assert(0 && "Unknown value type!");
     } else {
       if (NumXMMRegs < 4)
         NumXMMRegs++;
       else {
         // XMM arguments have to be aligned on 16-byte boundary.
         NumBytes = ((NumBytes + 15) / 16) * 16;
         NumBytes += 16;
       }
     }
     break;

  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
  // arguments and the arguments after the retaddr has been pushed are aligned.
  if ((NumBytes & 7) == 0)
    NumBytes += 4;

  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));

  // Arguments go on the stack in reverse order, as specified by the ABI.
  unsigned ArgOffset = 0;
  NumIntRegs = 0;
  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
  std::vector<SDOperand> MemOpChains;
Evan Cheng's avatar
Evan Cheng committed
  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
  for (unsigned i = 0; i != NumOps; ++i) {
    SDOperand Arg = Op.getOperand(5+2*i);

    switch (Arg.getValueType()) {
    default: assert(0 && "Unexpected ValueType for argument!");
    case MVT::i8:
    case MVT::i16:
Nick Lewycky's avatar
Nick Lewycky committed
    case MVT::i32: {
     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
     if (NumIntRegs < MaxNumIntRegs) {
       RegsToPass.push_back(
         std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs],
                        Arg));
       ++NumIntRegs;
       break;
     }
Nick Lewycky's avatar
Nick Lewycky committed
     } // Fall through
    case MVT::f32: {
      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
    case MVT::v16i8:
    case MVT::v8i16:
    case MVT::v4i32:
    case MVT::v2i64:
    case MVT::v4f32:
Evan Cheng's avatar
Evan Cheng committed
    case MVT::v2f64:
     if (isFastCall) {
       assert(0 && "Unexpected ValueType for argument!");
     } else {
       if (NumXMMRegs < 4) {
         RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
         NumXMMRegs++;
       } else {
         // XMM arguments have to be aligned on 16-byte boundary.
         ArgOffset = ((ArgOffset + 15) / 16) * 16;
         SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
         PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
                        &MemOpChains[0], MemOpChains.size());
  // Build a sequence of copy-to-reg nodes chained together with token chain
  // and flag operands which copy the outgoing args into registers.
  SDOperand InFlag;
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
                             InFlag);
Evan Cheng's avatar
Evan Cheng committed
    InFlag = Chain.getValue(1);
  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    // We should use extra load for direct calls to dllimported functions
    if (!((Subtarget->isTargetCygwin() || Subtarget->isTargetWindows()) &&
          WindowsGVRequiresExtraLoad(G->getGlobal())))
      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());

  std::vector<MVT::ValueType> NodeTys;
  NodeTys.push_back(MVT::Other);   // Returns a chain
  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
  std::vector<SDOperand> Ops;
  Ops.push_back(Chain);
  Ops.push_back(Callee);

  // Add argument registers to the end of the list so that they are known live
  // into the call.
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
    Ops.push_back(DAG.getRegister(RegsToPass[i].first, 
                                  RegsToPass[i].second.getValueType()));

  if (InFlag.Val)
Evan Cheng's avatar
Evan Cheng committed

  // FIXME: Do not generate X86ISD::TAILCALL for now.
  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
  InFlag = Chain.getValue(1);

  NodeTys.clear();
  NodeTys.push_back(MVT::Other);   // Returns a chain
  if (RetVT != MVT::Other)
    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
  Ops.clear();
  Ops.push_back(Chain);
  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
  Ops.push_back(InFlag);
  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
  if (RetVT != MVT::Other)
    InFlag = Chain.getValue(1);
  std::vector<SDOperand> ResultVals;
  NodeTys.clear();
  switch (RetVT) {
  default: assert(0 && "Unknown value type to return!");
  case MVT::Other: break;
  case MVT::i8:
    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
    ResultVals.push_back(Chain.getValue(0));
    NodeTys.push_back(MVT::i8);
    break;
  case MVT::i16:
    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
    ResultVals.push_back(Chain.getValue(0));
    NodeTys.push_back(MVT::i16);
    break;
  case MVT::i32:
    if (Op.Val->getValueType(1) == MVT::i32) {
      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
      ResultVals.push_back(Chain.getValue(0));
      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
                                 Chain.getValue(2)).getValue(1);
      ResultVals.push_back(Chain.getValue(0));
      NodeTys.push_back(MVT::i32);
    } else {
      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
      ResultVals.push_back(Chain.getValue(0));
Evan Cheng's avatar
Evan Cheng committed
    }
    NodeTys.push_back(MVT::i32);
    break;
  case MVT::v16i8:
  case MVT::v8i16:
  case MVT::v4i32:
  case MVT::v2i64:
  case MVT::v4f32:
  case MVT::v2f64:
   if (isFastCall) {
     assert(0 && "Unknown value type to return!");
   } else {
     Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
     ResultVals.push_back(Chain.getValue(0));
     NodeTys.push_back(RetVT);
   }
   break;
  case MVT::f32:
  case MVT::f64: {
    std::vector<MVT::ValueType> Tys;
    Tys.push_back(MVT::f64);
    Tys.push_back(MVT::Other);
    Tys.push_back(MVT::Flag);
    std::vector<SDOperand> Ops;
    Ops.push_back(Chain);
    Ops.push_back(InFlag);
    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
                                   &Ops[0], Ops.size());
    Chain  = RetVal.getValue(1);
    InFlag = RetVal.getValue(2);
    if (X86ScalarSSE) {
      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
      // shouldn't be necessary except that RFP cannot be live across
      // multiple blocks. When stackifier is fixed, they can be uncoupled.
      MachineFunction &MF = DAG.getMachineFunction();
      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
      Tys.clear();
      Tys.push_back(MVT::Other);
      Ops.push_back(Chain);
      Ops.push_back(RetVal);
      Ops.push_back(StackSlot);
      Ops.push_back(DAG.getValueType(RetVT));
      Ops.push_back(InFlag);
      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
      RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0);

    if (RetVT == MVT::f32 && !X86ScalarSSE)
      // FIXME: we would really like to remember that this FP_ROUND
      // operation is okay to eliminate if we allow excess FP precision.
      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
    ResultVals.push_back(RetVal);
    NodeTys.push_back(RetVT);
    break;
  }
Evan Cheng's avatar
Evan Cheng committed
  }

  // If the function returns void, just return the chain.
  if (ResultVals.empty())
    return Chain;
  
  // Otherwise, merge everything together with a MERGE_VALUES node.
  NodeTys.push_back(MVT::Other);
  ResultVals.push_back(Chain);
  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
                              &ResultVals[0], ResultVals.size());
//===----------------------------------------------------------------------===//
//                  StdCall Calling Convention implementation
//===----------------------------------------------------------------------===//
//  StdCall calling convention seems to be standard for many Windows' API
//  routines and around. It differs from C calling convention just a little:
//  callee should clean up the stack, not caller. Symbols should be also
//  decorated in some fancy way :) It doesn't support any vector arguments.

/// HowToPassStdCallCCArgument - Returns how an formal argument of the specified
/// type should be passed. Returns the size of the stack slot
static void
HowToPassStdCallCCArgument(MVT::ValueType ObjectVT, unsigned &ObjSize) {
  switch (ObjectVT) {
  default: assert(0 && "Unhandled argument type!");
  case MVT::i8:  ObjSize = 1; break;
  case MVT::i16: ObjSize = 2; break;
  case MVT::i32: ObjSize = 4; break;
  case MVT::i64: ObjSize = 8; break;
  case MVT::f32: ObjSize = 4; break;
  case MVT::f64: ObjSize = 8; break;
  }
}

SDOperand X86TargetLowering::LowerStdCallCCArguments(SDOperand Op,
                                                     SelectionDAG &DAG) {
  unsigned NumArgs = Op.Val->getNumValues() - 1;
  MachineFunction &MF = DAG.getMachineFunction();
  MachineFrameInfo *MFI = MF.getFrameInfo();
  SDOperand Root = Op.getOperand(0);
  std::vector<SDOperand> ArgValues;

  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
  // the stack frame looks like this:
  //
  // [ESP] -- return address
  // [ESP + 4] -- first argument (leftmost lexically)
  // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
  //    ...
  //
  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
  for (unsigned i = 0; i < NumArgs; ++i) {
    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
    unsigned ArgIncrement = 4;
    unsigned ObjSize = 0;
    HowToPassStdCallCCArgument(ObjectVT, ObjSize);
    if (ObjSize > 4)
      ArgIncrement = ObjSize;

    SDOperand ArgValue;
    // Create the frame index object for this incoming parameter...
    int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
    SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
    ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
    ArgValues.push_back(ArgValue);
    ArgOffset += ArgIncrement;   // Move on to the next argument...
  }

  ArgValues.push_back(Root);
  
  // If the function takes variable number of arguments, make a frame index for
  // the start of the first vararg value... for expansion of llvm.va_start.
  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
  if (isVarArg) {
    BytesToPopOnReturn = 0;         // Callee pops nothing.
    BytesCallerReserves = ArgOffset;
    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
  } else {
    BytesToPopOnReturn = ArgOffset; // Callee pops everything..
    BytesCallerReserves = 0;
  }
  RegSaveFrameIndex = 0xAAAAAAA;    // X86-64 only.
  ReturnAddrIndex = 0;              // No return address slot generated yet.

  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
  
  // Return the new list of results.
  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
                                     Op.Val->value_end());
  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
}


SDOperand X86TargetLowering::LowerStdCallCCCallTo(SDOperand Op,
                                                  SelectionDAG &DAG) {
  SDOperand Chain     = Op.getOperand(0);
  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
  SDOperand Callee    = Op.getOperand(4);
  MVT::ValueType RetVT= Op.Val->getValueType(0);
  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;  
  
  // Count how many bytes are to be pushed on the stack.
  unsigned NumBytes = 0;
  for (unsigned i = 0; i != NumOps; ++i) {
    SDOperand Arg = Op.getOperand(5+2*i);

    switch (Arg.getValueType()) {
    default: assert(0 && "Unexpected ValueType for argument!");
    case MVT::i8:
    case MVT::i16:
    case MVT::i32:
    case MVT::f32:
      NumBytes += 4;
      break;
    case MVT::i64:
    case MVT::f64:
      NumBytes += 8;
      break;
    }
  }
  
  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));

  // Arguments go on the stack in reverse order, as specified by the ABI.
  unsigned ArgOffset = 0;
  std::vector<SDOperand> MemOpChains;
  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
  for (unsigned i = 0; i != NumOps; ++i) {
    SDOperand Arg = Op.getOperand(5+2*i);

    switch (Arg.getValueType()) {
    default: assert(0 && "Unexpected ValueType for argument!");
    case MVT::i8:
    case MVT::i16: {
      // Promote the integer to 32 bits.  If the input type is signed use a
      // sign extend, otherwise use a zero extend.
      unsigned ExtOp =
        dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ?
        ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
      Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
    }
    // Fallthrough

    case MVT::i32:
    case MVT::f32: {
      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
      ArgOffset += 4;
      break;
    }
    case MVT::i64:
    case MVT::f64: {
      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
      ArgOffset += 8;
      break;
    }
    }
  }

  if (!MemOpChains.empty())
    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
                        &MemOpChains[0], MemOpChains.size());

  // If the callee is a GlobalAddress node (quite common, every direct call is)
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    // We should use extra load for direct calls to dllimported functions
    if (!((Subtarget->isTargetCygwin() || Subtarget->isTargetWindows()) &&
          WindowsGVRequiresExtraLoad(G->getGlobal())))
      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());

  std::vector<MVT::ValueType> NodeTys;
  NodeTys.push_back(MVT::Other);   // Returns a chain