X86ISelLowering.cpp

//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//

#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Intrinsics.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ParameterAttributes.h"
using namespace llvm;

X86TargetLowering::X86TargetLowering(TargetMachine &TM)
  : TargetLowering(TM) {
  Subtarget = &TM.getSubtarget<X86Subtarget>();
  X86ScalarSSEf64 = Subtarget->hasSSE2();
  X86ScalarSSEf32 = Subtarget->hasSSE1();
  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
  
  bool Fast = false;

  RegInfo = TM.getRegisterInfo();

  // Set up the TargetLowering object.

  // X86 is weird, it always uses i8 for shift amounts and setcc results.
  setShiftAmountType(MVT::i8);
  setSetCCResultType(MVT::i8);
  setSetCCResultContents(ZeroOrOneSetCCResult);
  setSchedulingPreference(SchedulingForRegPressure);
  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
  setStackPointerRegisterToSaveRestore(X86StackPtr);

  if (Subtarget->isTargetDarwin()) {
    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
    setUseUnderscoreSetJmp(false);
    setUseUnderscoreLongJmp(false);
  } else if (Subtarget->isTargetMingw()) {
    // MS runtime is weird: it exports _setjmp, but longjmp!
    setUseUnderscoreSetJmp(true);
    setUseUnderscoreLongJmp(false);
  } else {
    setUseUnderscoreSetJmp(true);
    setUseUnderscoreLongJmp(true);
  }
  
  // Set up the register classes.
  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
  if (Subtarget->is64Bit())
    addRegisterClass(MVT::i64, X86::GR64RegisterClass);

  setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);

  // We don't accept any truncstore of integer registers.  
  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
  setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
  setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
  setTruncStoreAction(MVT::i16, MVT::i8, Expand);

  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
  // operation.
  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);

  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
  } else {
    if (X86ScalarSSEf64)
      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
    else
      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
  }

  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
  // this operation.
  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
  // SSE has no i16 to fp conversion, only i32
  if (X86ScalarSSEf32) {
    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
    // f32 and f64 cases are Legal, f80 case is not
    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
  } else {
    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
  }

  // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
  // are Legal, f80 is custom lowered.
  setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
  setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);

  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
  // this operation.
  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);

  if (X86ScalarSSEf32) {
    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
    // f32 and f64 cases are Legal, f80 case is not
    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
  } else {
    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
  }

  // Handle FP_TO_UINT by promoting the destination to a larger signed
  // conversion.
  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);

  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
  } else {
    if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
      // Expand FP_TO_UINT into a select.
      // FIXME: We would like to use a Custom expander here eventually to do
      // the optimal thing for SSE vs. the default expansion in the legalizer.
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
    else
      // With SSE3 we can use fisttpll to convert to a signed i64.
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
  }

  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
  if (!X86ScalarSSEf64) {
    setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
    setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
  }

  // Scalar integer multiply, multiply-high, divide, and remainder are
  // lowered to use operations that produce two results, to match the
  // available instructions. This exposes the two-result form to trivial
  // CSE, which is able to combine x/y and x%y into a single instruction,
  // for example. The single-result multiply instructions are introduced
  // in X86ISelDAGToDAG.cpp, after CSE, for uses where the the high part
  // is not needed.
  setOperationAction(ISD::MUL             , MVT::i8    , Expand);
  setOperationAction(ISD::MULHS           , MVT::i8    , Expand);
  setOperationAction(ISD::MULHU           , MVT::i8    , Expand);
  setOperationAction(ISD::SDIV            , MVT::i8    , Expand);
  setOperationAction(ISD::UDIV            , MVT::i8    , Expand);
  setOperationAction(ISD::SREM            , MVT::i8    , Expand);
  setOperationAction(ISD::UREM            , MVT::i8    , Expand);
  setOperationAction(ISD::MUL             , MVT::i16   , Expand);
  setOperationAction(ISD::MULHS           , MVT::i16   , Expand);
  setOperationAction(ISD::MULHU           , MVT::i16   , Expand);
  setOperationAction(ISD::SDIV            , MVT::i16   , Expand);
  setOperationAction(ISD::UDIV            , MVT::i16   , Expand);
  setOperationAction(ISD::SREM            , MVT::i16   , Expand);
  setOperationAction(ISD::UREM            , MVT::i16   , Expand);
  setOperationAction(ISD::MUL             , MVT::i32   , Expand);
  setOperationAction(ISD::MULHS           , MVT::i32   , Expand);
  setOperationAction(ISD::MULHU           , MVT::i32   , Expand);
  setOperationAction(ISD::SDIV            , MVT::i32   , Expand);
  setOperationAction(ISD::UDIV            , MVT::i32   , Expand);
  setOperationAction(ISD::SREM            , MVT::i32   , Expand);
  setOperationAction(ISD::UREM            , MVT::i32   , Expand);
  setOperationAction(ISD::MUL             , MVT::i64   , Expand);
  setOperationAction(ISD::MULHS           , MVT::i64   , Expand);
  setOperationAction(ISD::MULHU           , MVT::i64   , Expand);
  setOperationAction(ISD::SDIV            , MVT::i64   , Expand);
  setOperationAction(ISD::UDIV            , MVT::i64   , Expand);
  setOperationAction(ISD::SREM            , MVT::i64   , Expand);
  setOperationAction(ISD::UREM            , MVT::i64   , Expand);

  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
  if (Subtarget->is64Bit())
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
  setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
  
  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
  setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
  setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
  setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
  setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
    setOperationAction(ISD::CTTZ           , MVT::i64  , Custom);
    setOperationAction(ISD::CTLZ           , MVT::i64  , Custom);
  }

  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);

  // These should be promoted to a larger select which is supported.
  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
  // X86 wants to expand cmov itself.
  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
  setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
  setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
  }
  // X86 ret instruction may pop stack.
  setOperationAction(ISD::RET             , MVT::Other, Custom);
  if (!Subtarget->is64Bit())
    setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);

  // Darwin ABI issue.
  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
  }
  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
  // X86 wants to expand memset / memcpy itself.
  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);

  // Use the default ISD::LOCATION, ISD::DECLARE expansion.
  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
  // FIXME - use subtarget debug flags
  if (!Subtarget->isTargetDarwin() &&
      !Subtarget->isTargetELF() &&
      !Subtarget->isTargetCygMing())
    setOperationAction(ISD::LABEL, MVT::Other, Expand);

  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
  if (Subtarget->is64Bit()) {
    // FIXME: Verify
    setExceptionPointerRegister(X86::RAX);
    setExceptionSelectorRegister(X86::RDX);
  } else {
    setExceptionPointerRegister(X86::EAX);
    setExceptionSelectorRegister(X86::EDX);
  }
  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
  
  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);

  setOperationAction(ISD::TRAP, MVT::Other, Legal);

  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
  if (Subtarget->is64Bit())
    setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
  else
    setOperationAction(ISD::VACOPY          , MVT::Other, Expand);

  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
  if (Subtarget->is64Bit())
    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
  if (Subtarget->isTargetCygMing())
    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
  else
    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);

  if (X86ScalarSSEf64) {
    // f32 and f64 use SSE.
    // Set up the FP register classes.
    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
    addRegisterClass(MVT::f64, X86::FR64RegisterClass);

    // Use ANDPD to simulate FABS.
    setOperationAction(ISD::FABS , MVT::f64, Custom);
    setOperationAction(ISD::FABS , MVT::f32, Custom);

    // Use XORP to simulate FNEG.
    setOperationAction(ISD::FNEG , MVT::f64, Custom);
    setOperationAction(ISD::FNEG , MVT::f32, Custom);

    // Use ANDPD and ORPD to simulate FCOPYSIGN.
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

    // We don't support sin/cos/fmod
    setOperationAction(ISD::FSIN , MVT::f64, Expand);
    setOperationAction(ISD::FCOS , MVT::f64, Expand);
    setOperationAction(ISD::FREM , MVT::f64, Expand);
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);
    setOperationAction(ISD::FREM , MVT::f32, Expand);

    // Expand FP immediates into loads from the stack, except for the special
    // cases we handle.
    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
    addLegalFPImmediate(APFloat(+0.0)); // xorpd
    addLegalFPImmediate(APFloat(+0.0f)); // xorps

    // Floating truncations from f80 and extensions to f80 go through memory.
    // If optimizing, we lie about this though and handle it in
    // InstructionSelectPreprocess so that dagcombine2 can hack on these.
    if (Fast) {
      setConvertAction(MVT::f32, MVT::f80, Expand);
      setConvertAction(MVT::f64, MVT::f80, Expand);
      setConvertAction(MVT::f80, MVT::f32, Expand);
      setConvertAction(MVT::f80, MVT::f64, Expand);
    }
  } else if (X86ScalarSSEf32) {
    // Use SSE for f32, x87 for f64.
    // Set up the FP register classes.
    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);

    // Use ANDPS to simulate FABS.
    setOperationAction(ISD::FABS , MVT::f32, Custom);

    // Use XORP to simulate FNEG.
    setOperationAction(ISD::FNEG , MVT::f32, Custom);

    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);

    // Use ANDPS and ORPS to simulate FCOPYSIGN.
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

    // We don't support sin/cos/fmod
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);
    setOperationAction(ISD::FREM , MVT::f32, Expand);

    // Expand FP immediates into loads from the stack, except for the special
    // cases we handle.
    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
    addLegalFPImmediate(APFloat(+0.0f)); // xorps
    addLegalFPImmediate(APFloat(+0.0)); // FLD0
    addLegalFPImmediate(APFloat(+1.0)); // FLD1
    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS

    // SSE <-> X87 conversions go through memory.  If optimizing, we lie about
    // this though and handle it in InstructionSelectPreprocess so that
    // dagcombine2 can hack on these.
    if (Fast) {
      setConvertAction(MVT::f32, MVT::f64, Expand);
      setConvertAction(MVT::f32, MVT::f80, Expand);
      setConvertAction(MVT::f80, MVT::f32, Expand);    
      setConvertAction(MVT::f64, MVT::f32, Expand);
      // And x87->x87 truncations also.
      setConvertAction(MVT::f80, MVT::f64, Expand);
    }

    if (!UnsafeFPMath) {
      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
    }
  } else {
    // f32 and f64 in x87.
    // Set up the FP register classes.
    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
    addRegisterClass(MVT::f32, X86::RFP32RegisterClass);

    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
    setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

    // Floating truncations go through memory.  If optimizing, we lie about
    // this though and handle it in InstructionSelectPreprocess so that
    // dagcombine2 can hack on these.
    if (Fast) {
      setConvertAction(MVT::f80, MVT::f32, Expand);    
      setConvertAction(MVT::f64, MVT::f32, Expand);
      setConvertAction(MVT::f80, MVT::f64, Expand);
    }

    if (!UnsafeFPMath) {
      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
    }

    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
    addLegalFPImmediate(APFloat(+0.0)); // FLD0
    addLegalFPImmediate(APFloat(+1.0)); // FLD1
    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
    addLegalFPImmediate(APFloat(+0.0f)); // FLD0
    addLegalFPImmediate(APFloat(+1.0f)); // FLD1
    addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
    addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
  }

  // Long double always uses X87.
  addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
  setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
  setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
  {
    setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
    APFloat TmpFlt(+0.0);
    TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven);
    addLegalFPImmediate(TmpFlt);  // FLD0
    TmpFlt.changeSign();
    addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
    APFloat TmpFlt2(+1.0);
    TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven);
    addLegalFPImmediate(TmpFlt2);  // FLD1
    TmpFlt2.changeSign();
    addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
  }
    
  if (!UnsafeFPMath) {
    setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
    setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
  }

  // Always use a library call for pow.
  setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
  setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
  setOperationAction(ISD::FPOW             , MVT::f80  , Expand);

  // First set operation action for all vector types to expand. Then we
  // will selectively turn on ones that can be effectively codegen'd.
  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FABS, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FSIN, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FCOS, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FREM, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FPOWI, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FSQRT, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FCOPYSIGN, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SHL, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SRA, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::SRL, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::ROTL, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::ROTR, (MVT::ValueType)VT, Expand);
    setOperationAction(ISD::BSWAP, (MVT::ValueType)VT, Expand);
  }

  if (Subtarget->hasMMX()) {
    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
    addRegisterClass(MVT::v1i64, X86::VR64RegisterClass);

    // FIXME: add MMX packed arithmetics

    setOperationAction(ISD::ADD,                MVT::v8i8,  Legal);
    setOperationAction(ISD::ADD,                MVT::v4i16, Legal);
    setOperationAction(ISD::ADD,                MVT::v2i32, Legal);
    setOperationAction(ISD::ADD,                MVT::v1i64, Legal);

    setOperationAction(ISD::SUB,                MVT::v8i8,  Legal);
    setOperationAction(ISD::SUB,                MVT::v4i16, Legal);
    setOperationAction(ISD::SUB,                MVT::v2i32, Legal);
    setOperationAction(ISD::SUB,                MVT::v1i64, Legal);

    setOperationAction(ISD::MULHS,              MVT::v4i16, Legal);
    setOperationAction(ISD::MUL,                MVT::v4i16, Legal);

    setOperationAction(ISD::AND,                MVT::v8i8,  Promote);
    AddPromotedToType (ISD::AND,                MVT::v8i8,  MVT::v1i64);
    setOperationAction(ISD::AND,                MVT::v4i16, Promote);
    AddPromotedToType (ISD::AND,                MVT::v4i16, MVT::v1i64);
    setOperationAction(ISD::AND,                MVT::v2i32, Promote);
    AddPromotedToType (ISD::AND,                MVT::v2i32, MVT::v1i64);
    setOperationAction(ISD::AND,                MVT::v1i64, Legal);

    setOperationAction(ISD::OR,                 MVT::v8i8,  Promote);
    AddPromotedToType (ISD::OR,                 MVT::v8i8,  MVT::v1i64);
    setOperationAction(ISD::OR,                 MVT::v4i16, Promote);
    AddPromotedToType (ISD::OR,                 MVT::v4i16, MVT::v1i64);
    setOperationAction(ISD::OR,                 MVT::v2i32, Promote);
    AddPromotedToType (ISD::OR,                 MVT::v2i32, MVT::v1i64);
    setOperationAction(ISD::OR,                 MVT::v1i64, Legal);

    setOperationAction(ISD::XOR,                MVT::v8i8,  Promote);
    AddPromotedToType (ISD::XOR,                MVT::v8i8,  MVT::v1i64);
    setOperationAction(ISD::XOR,                MVT::v4i16, Promote);
    AddPromotedToType (ISD::XOR,                MVT::v4i16, MVT::v1i64);
    setOperationAction(ISD::XOR,                MVT::v2i32, Promote);
    AddPromotedToType (ISD::XOR,                MVT::v2i32, MVT::v1i64);
    setOperationAction(ISD::XOR,                MVT::v1i64, Legal);

    setOperationAction(ISD::LOAD,               MVT::v8i8,  Promote);
    AddPromotedToType (ISD::LOAD,               MVT::v8i8,  MVT::v1i64);
    setOperationAction(ISD::LOAD,               MVT::v4i16, Promote);
    AddPromotedToType (ISD::LOAD,               MVT::v4i16, MVT::v1i64);
    setOperationAction(ISD::LOAD,               MVT::v2i32, Promote);
    AddPromotedToType (ISD::LOAD,               MVT::v2i32, MVT::v1i64);
    setOperationAction(ISD::LOAD,               MVT::v1i64, Legal);

    setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i8,  Custom);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i16, Custom);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i32, Custom);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v1i64, Custom);

    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i8,  Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i16, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i32, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v1i64, Custom);

    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Custom);
  }

  if (Subtarget->hasSSE1()) {
    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);

    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
    setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
    setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
  }

  if (Subtarget->hasSSE2()) {
    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);

    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
    setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
    setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
    setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
    setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);

    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);

    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
      // Do not attempt to custom lower non-power-of-2 vectors
      if (!isPowerOf2_32(MVT::getVectorNumElements(VT)))
        continue;
      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
    }
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
    if (Subtarget->is64Bit())
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);

    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
    }

    setTruncStoreAction(MVT::f64, MVT::f32, Expand);

    // Custom lower v2i64 and v2f64 selects.
    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
  }

  // We want to custom lower some of our intrinsics.
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);

  // We have target-specific dag combine patterns for the following nodes:
  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
  setTargetDAGCombine(ISD::SELECT);

  computeRegisterProperties();

  // FIXME: These should be based on subtarget info. Plus, the values should
  // be smaller when we are in optimizing for size mode.
  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
  allowUnalignedMemoryAccesses = true; // x86 supports it!
}

/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
/// the desired ByVal argument alignment.
static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
  if (MaxAlign == 16)
    return;
  if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
    if (VTy->getBitWidth() == 128)
      MaxAlign = 16;
  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
    unsigned EltAlign = 0;
    getMaxByValAlign(ATy->getElementType(), EltAlign);
    if (EltAlign > MaxAlign)
      MaxAlign = EltAlign;
  } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
      unsigned EltAlign = 0;
      getMaxByValAlign(STy->getElementType(i), EltAlign);
      if (EltAlign > MaxAlign)
        MaxAlign = EltAlign;
      if (MaxAlign == 16)
        break;
    }
  }
  return;
}

/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. For X86, aggregates
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
/// are at 4-byte boundaries.
unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
  if (Subtarget->is64Bit())
    return getTargetData()->getABITypeAlignment(Ty);
  unsigned Align = 4;
  if (Subtarget->hasSSE1())
    getMaxByValAlign(Ty, Align);
  return Align;
}

/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
/// jumptable.
SDOperand X86TargetLowering::getPICJumpTableRelocBase(SDOperand Table,
                                                      SelectionDAG &DAG) const {
  if (usesGlobalOffsetTable())
    return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy());
  if (!Subtarget->isPICStyleRIPRel())
    return DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy());
  return Table;
}

//===----------------------------------------------------------------------===//
//               Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//

#include "X86GenCallingConv.inc"

/// GetPossiblePreceedingTailCall - Get preceeding X86ISD::TAILCALL node if it
/// exists skip possible ISD:TokenFactor.
static SDOperand GetPossiblePreceedingTailCall(SDOperand Chain) {
  if (Chain.getOpcode() == X86ISD::TAILCALL) {
    return Chain;
  } else if (Chain.getOpcode() == ISD::TokenFactor) {
    if (Chain.getNumOperands() &&
        Chain.getOperand(0).getOpcode() == X86ISD::TAILCALL)
      return Chain.getOperand(0);
  }
  return Chain;
}

/// LowerRET - Lower an ISD::RET node.
SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
  assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
  
  SmallVector<CCValAssign, 16> RVLocs;
  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
  CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
  CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
    
  // If this is the first return lowered for this function, add the regs to the
  // liveout set for the function.
  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
    for (unsigned i = 0; i != RVLocs.size(); ++i)
      if (RVLocs[i].isRegLoc())
        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
  }
  SDOperand Chain = Op.getOperand(0);
  
  // Handle tail call return.
  Chain = GetPossiblePreceedingTailCall(Chain);
  if (Chain.getOpcode() == X86ISD::TAILCALL) {
    SDOperand TailCall = Chain;
    SDOperand TargetAddress = TailCall.getOperand(1);
    SDOperand StackAdjustment = TailCall.getOperand(2);
    assert(((TargetAddress.getOpcode() == ISD::Register &&
               (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::ECX ||
                cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
              TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
              TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && 
             "Expecting an global address, external symbol, or register");
    assert(StackAdjustment.getOpcode() == ISD::Constant &&
           "Expecting a const value");

    SmallVector<SDOperand,8> Operands;
    Operands.push_back(Chain.getOperand(0));
    Operands.push_back(TargetAddress);
    Operands.push_back(StackAdjustment);
    // Copy registers used by the call. Last operand is a flag so it is not
    // copied.
    for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) {
      Operands.push_back(Chain.getOperand(i));
    }
    return DAG.getNode(X86ISD::TC_RETURN, MVT::Other, &Operands[0], 
                       Operands.size());
  }
  
  // Regular return.
  SDOperand Flag;

  // Copy the result values into the output registers.
  if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
      RVLocs[0].getLocReg() != X86::ST0) {
    for (unsigned i = 0; i != RVLocs.size(); ++i) {
      CCValAssign &VA = RVLocs[i];
      assert(VA.isRegLoc() && "Can only return in registers!");
      Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
                               Flag);
      Flag = Chain.getValue(1);
    }
  } else {
    // We need to handle a destination of ST0 specially, because it isn't really
    // a register.
    SDOperand Value = Op.getOperand(1);
    
    // an XMM register onto the fp-stack.  Do this with an FP_EXTEND to f80.
    // This will get legalized into a load/store if it can't get optimized away.
    if (isScalarFPTypeInSSEReg(RVLocs[0].getValVT()))
      Value = DAG.getNode(ISD::FP_EXTEND, MVT::f80, Value);
    
    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
    SDOperand Ops[] = { Chain, Value };
    Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
    Flag = Chain.getValue(1);
  }
  
  SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
  if (Flag.Val)
    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
  else
    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
}


/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers.  This assumes that
/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
/// being lowered.  The returns a SDNode with the same number of values as the
/// ISD::CALL.
SDNode *X86TargetLowering::
LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall, 
                unsigned CallingConv, SelectionDAG &DAG) {
  
  // Assign locations to each value returned by this call.
  SmallVector<CCValAssign, 16> RVLocs;
  bool isVarArg = cast<ConstantSDNode>(TheCall->getOperand(2))->getValue() != 0;
  CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
  CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);

  SmallVector<SDOperand, 8> ResultVals;
  
  // Copy all of the result registers out of their specified physreg.
  if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) {
    for (unsigned i = 0; i != RVLocs.size(); ++i) {
      Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
                                 RVLocs[i].getValVT(), InFlag).getValue(1);
      InFlag = Chain.getValue(2);
      ResultVals.push_back(Chain.getValue(0));
    }
  } else {
    // Copies from the FP stack are special, as ST0 isn't a valid register
    // before the fp stackifier runs.
    
    // Copy ST0 into an RFP register with FP_GET_RESULT.  If this will end up
    // in an SSE register, copy it out as F80 and do a truncate, otherwise use
    // the specified value type.
    MVT::ValueType GetResultTy = RVLocs[0].getValVT();
    if (isScalarFPTypeInSSEReg(GetResultTy))
      GetResultTy = MVT::f80;
    SDVTList Tys = DAG.getVTList(GetResultTy, MVT::Other, MVT::Flag);
    
    SDOperand GROps[] = { Chain, InFlag };
    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
    Chain  = RetVal.getValue(1);
    InFlag = RetVal.getValue(2);

    // If we want the result in an SSE register, use an FP_TRUNCATE to get it
    // there.
    if (GetResultTy != RVLocs[0].getValVT())
      RetVal = DAG.getNode(ISD::FP_ROUND, RVLocs[0].getValVT(), RetVal,
                           // This truncation won't change the value.
                           DAG.getIntPtrConstant(1));
    
    ResultVals.push_back(RetVal);
  }
  
  // Merge everything together with a MERGE_VALUES node.
  ResultVals.push_back(Chain);
  return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
                     &ResultVals[0], ResultVals.size()).Val;
}

/// LowerCallResultToTwo64BitRegs - Lower the result values of an x86-64
/// ISD::CALL where the results are known to be in two 64-bit registers,
/// e.g. XMM0 and XMM1. This simplify store the two values back to the
/// fixed stack slot allocated for StructRet.
SDNode *X86TargetLowering::
LowerCallResultToTwo64BitRegs(SDOperand Chain, SDOperand InFlag,
                              SDNode *TheCall, unsigned Reg1, unsigned Reg2,
                              MVT::ValueType VT, SelectionDAG &DAG) {
  SDOperand RetVal1 = DAG.getCopyFromReg(Chain, Reg1, VT, InFlag);
  Chain = RetVal1.getValue(1);
  InFlag = RetVal1.getValue(2);
  SDOperand RetVal2 = DAG.getCopyFromReg(Chain, Reg2, VT, InFlag);
  Chain = RetVal2.getValue(1);
  InFlag = RetVal2.getValue(2);
  SDOperand FIN = TheCall->getOperand(5);
  Chain = DAG.getStore(Chain, RetVal1, FIN, NULL, 0);
  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(8));
  Chain = DAG.getStore(Chain, RetVal2, FIN, NULL, 0);
  return Chain.Val;
}

/// LowerCallResultToTwoX87Regs - Lower the result values of an x86-64 ISD::CALL
/// where the results are known to be in ST0 and ST1.
SDNode *X86TargetLowering::
LowerCallResultToTwoX87Regs(SDOperand Chain, SDOperand InFlag,
                            SDNode *TheCall, SelectionDAG &DAG) {
  SmallVector<SDOperand, 8> ResultVals;
  const MVT::ValueType VTs[] = { MVT::f80, MVT::f80, MVT::Other, MVT::Flag };
  SDVTList Tys = DAG.getVTList(VTs, 4);
  SDOperand Ops[] = { Chain, InFlag };
  SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT2, Tys, Ops, 2);
  Chain = RetVal.getValue(2);
  SDOperand FIN = TheCall->getOperand(5);
  Chain = DAG.getStore(Chain, RetVal.getValue(1), FIN, NULL, 0);
  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN, DAG.getIntPtrConstant(16));
  Chain = DAG.getStore(Chain, RetVal, FIN, NULL, 0);
  return Chain.Val;
}

//===----------------------------------------------------------------------===//
//                C & StdCall & Fast Calling Convention implementation
//===----------------------------------------------------------------------===//
//  StdCall calling convention seems to be standard for many Windows' API
//  routines and around. It differs from C calling convention just a little:
//  callee should clean up the stack, not caller. Symbols should be also
//  decorated in some fancy way :) It doesn't support any vector arguments.
//  For info on fast calling convention see Fast Calling Convention (tail call)
//  implementation LowerX86_32FastCCCallTo.

/// AddLiveIn - This helper function adds the specified physical register to the
/// MachineFunction as a live in value.  It also creates a corresponding virtual
/// register for it.
static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
                          const TargetRegisterClass *RC) {
  assert(RC->contains(PReg) && "Not the correct regclass!");
  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
  MF.getRegInfo().addLiveIn(PReg, VReg);
  return VReg;
}

// Determines whether a CALL node uses struct return semantics.
static bool CallIsStructReturn(SDOperand Op) {
  unsigned NumOps = (Op.getNumOperands() - 5) / 2;
  if (!NumOps)
    return false;
  
  ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(6));
  return Flags->getValue() & ISD::ParamFlags::StructReturn;
}

// Determines whether a FORMAL_ARGUMENTS node uses struct return semantics.
static bool ArgsAreStructReturn(SDOperand Op) {
  unsigned NumArgs = Op.Val->getNumValues() - 1;
  if (!NumArgs)
    return false;
  
  ConstantSDNode *Flags = cast<ConstantSDNode>(Op.getOperand(3));
  return Flags->getValue() & ISD::ParamFlags::StructReturn;
}

// Determines whether a CALL or FORMAL_ARGUMENTS node requires the callee to pop
// its own arguments. Callee pop is necessary to support tail calls.
bool X86TargetLowering::IsCalleePop(SDOperand Op) {
  bool IsVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
  if (IsVarArg)
    return false;

  switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) {
  default:
    return false;
  case CallingConv::X86_StdCall: