X86ISelLowering.cpp

//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
//
//                     The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that X86 uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//

#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "Utils/X86ShuffleDecode.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace dwarf;

STATISTIC(NumTailCalls, "Number of tail calls");

// Forward declarations.
static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
                       SDValue V2);

static SDValue Insert128BitVector(SDValue Result,
                                  SDValue Vec,
                                  SDValue Idx,
                                  SelectionDAG &DAG,
                                  DebugLoc dl);

static SDValue Extract128BitVector(SDValue Vec,
                                   SDValue Idx,
                                   SelectionDAG &DAG,
                                   DebugLoc dl);

static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);


/// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
/// sets things up to match to an AVX VEXTRACTF128 instruction or a
/// simple subregister reference.  Idx is an index in the 128 bits we
/// want.  It need not be aligned to a 128-bit bounday.  That makes
/// lowering EXTRACT_VECTOR_ELT operations easier.
static SDValue Extract128BitVector(SDValue Vec,
                                   SDValue Idx,
                                   SelectionDAG &DAG,
                                   DebugLoc dl) {
  EVT VT = Vec.getValueType();
  assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");

  EVT ElVT = VT.getVectorElementType();

  int Factor = VT.getSizeInBits() / 128;

  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
                                  ElVT,
                                  VT.getVectorNumElements() / Factor);

  // Extract from UNDEF is UNDEF.
  if (Vec.getOpcode() == ISD::UNDEF)
    return DAG.getNode(ISD::UNDEF, dl, ResultVT);

  if (isa<ConstantSDNode>(Idx)) {
    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();

    // Extract the relevant 128 bits.  Generate an EXTRACT_SUBVECTOR
    // we can match to VEXTRACTF128.
    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();

    // This is the index of the first element of the 128-bit chunk
    // we want.
    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
                                 * ElemsPerChunk);

    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);

    SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
                                 VecIdx);

    return Result;
  }

  return SDValue();
}

/// Generate a DAG to put 128-bits into a vector > 128 bits.  This
/// sets things up to match to an AVX VINSERTF128 instruction or a
/// simple superregister reference.  Idx is an index in the 128 bits
/// we want.  It need not be aligned to a 128-bit bounday.  That makes
/// lowering INSERT_VECTOR_ELT operations easier.
static SDValue Insert128BitVector(SDValue Result,
                                  SDValue Vec,
                                  SDValue Idx,
                                  SelectionDAG &DAG,
                                  DebugLoc dl) {
  if (isa<ConstantSDNode>(Idx)) {
    EVT VT = Vec.getValueType();
    assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");

    EVT ElVT = VT.getVectorElementType();

    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();

    EVT ResultVT = Result.getValueType();

    // Insert the relevant 128 bits.
    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();

    // This is the index of the first element of the 128-bit chunk
    // we want.
    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
                                 * ElemsPerChunk);

    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);

    Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
                         VecIdx);
    return Result;
  }

  return SDValue();
}

/// Given two vectors, concat them.
static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
  DebugLoc dl = Lower.getDebugLoc();

  assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");

  EVT VT = EVT::getVectorVT(*DAG.getContext(),
                            Lower.getValueType().getVectorElementType(),
                            Lower.getValueType().getVectorNumElements() * 2);

  // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
  assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");

  // Insert the upper subvector.
  SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
                                   DAG.getConstant(
                                     // This is half the length of the result
                                     // vector.  Start inserting the upper 128
                                     // bits here.
                                     Lower.getValueType().getVectorNumElements(),
                                     MVT::i32),
                                   DAG, dl);

  // Insert the lower subvector.
  Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
  return Vec;
}

static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
  bool is64Bit = Subtarget->is64Bit();

  if (Subtarget->isTargetEnvMacho()) {
    if (is64Bit)
      return new X8664_MachoTargetObjectFile();
    return new TargetLoweringObjectFileMachO();
  }

  if (Subtarget->isTargetELF()) {
    if (is64Bit)
      return new X8664_ELFTargetObjectFile(TM);
    return new X8632_ELFTargetObjectFile(TM);
  }
  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
    return new TargetLoweringObjectFileCOFF();
  llvm_unreachable("unknown subtarget type");
}

X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
  : TargetLowering(TM, createTLOF(TM)) {
  Subtarget = &TM.getSubtarget<X86Subtarget>();
  X86ScalarSSEf64 = Subtarget->hasXMMInt();
  X86ScalarSSEf32 = Subtarget->hasXMM();
  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;

  RegInfo = TM.getRegisterInfo();
  TD = getTargetData();

  // Set up the TargetLowering object.
  static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };

  // X86 is weird, it always uses i8 for shift amounts and setcc results.
  setBooleanContents(ZeroOrOneBooleanContent);
    
  // For 64-bit since we have so many registers use the ILP scheduler, for
  // 32-bit code use the register pressure specific scheduling.
  if (Subtarget->is64Bit())
    setSchedulingPreference(Sched::ILP);
  else
    setSchedulingPreference(Sched::RegPressure);
  setStackPointerRegisterToSaveRestore(X86StackPtr);

  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
    // Setup Windows compiler runtime calls.
    setLibcallName(RTLIB::SDIV_I64, "_alldiv");
    setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
    setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
    setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
  }

  if (Subtarget->isTargetDarwin()) {
    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
    setUseUnderscoreSetJmp(false);
    setUseUnderscoreLongJmp(false);
  } else if (Subtarget->isTargetMingw()) {
    // MS runtime is weird: it exports _setjmp, but longjmp!
    setUseUnderscoreSetJmp(true);
    setUseUnderscoreLongJmp(false);
  } else {
    setUseUnderscoreSetJmp(true);
    setUseUnderscoreLongJmp(true);
  }

  // Set up the register classes.
  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
  if (Subtarget->is64Bit())
    addRegisterClass(MVT::i64, X86::GR64RegisterClass);

  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);

  // We don't accept any truncstore of integer registers.
  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
  setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
  setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
  setTruncStoreAction(MVT::i16, MVT::i8,  Expand);

  // SETOEQ and SETUNE require checking two conditions.
  setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
  setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
  setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
  setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
  setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
  setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);

  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
  // operation.
  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);

  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
  } else if (!UseSoftFloat) {
    // We have an algorithm for SSE2->double, and we turn this into a
    // 64-bit FILD followed by conditional FADD for other targets.
    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
    // We have an algorithm for SSE2, and we turn this into a 64-bit
    // FILD for other targets.
    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
  }

  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
  // this operation.
  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);

  if (!UseSoftFloat) {
    // SSE has no i16 to fp conversion, only i32
    if (X86ScalarSSEf32) {
      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
      // f32 and f64 cases are Legal, f80 case is not
      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
    } else {
      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
    }
  } else {
    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
  }

  // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
  // are Legal, f80 is custom lowered.
  setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
  setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);

  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
  // this operation.
  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);

  if (X86ScalarSSEf32) {
    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
    // f32 and f64 cases are Legal, f80 case is not
    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
  } else {
    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
  }

  // Handle FP_TO_UINT by promoting the destination to a larger signed
  // conversion.
  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);

  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
  } else if (!UseSoftFloat) {
    if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
      // Expand FP_TO_UINT into a select.
      // FIXME: We would like to use a Custom expander here eventually to do
      // the optimal thing for SSE vs. the default expansion in the legalizer.
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
    else
      // With SSE3 we can use fisttpll to convert to a signed i64; without
      // SSE, we're stuck with a fistpll.
      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
  }

  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
  if (!X86ScalarSSEf64) {
    setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
    setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
    if (Subtarget->is64Bit()) {
      setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
      // Without SSE, i64->f64 goes through memory.
      setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
    }
  }

  // Scalar integer divide and remainder are lowered to use operations that
  // produce two results, to match the available instructions. This exposes
  // the two-result form to trivial CSE, which is able to combine x/y and x%y
  // into a single instruction.
  //
  // Scalar integer multiply-high is also lowered to use two-result
  // operations, to match the available instructions. However, plain multiply
  // (low) operations are left as Legal, as there are single-result
  // instructions for this in x86. Using the two-result multiply instructions
  // when both high and low results are needed must be arranged by dagcombine.
  for (unsigned i = 0, e = 4; i != e; ++i) {
    MVT VT = IntVTs[i];
    setOperationAction(ISD::MULHS, VT, Expand);
    setOperationAction(ISD::MULHU, VT, Expand);
    setOperationAction(ISD::SDIV, VT, Expand);
    setOperationAction(ISD::UDIV, VT, Expand);
    setOperationAction(ISD::SREM, VT, Expand);
    setOperationAction(ISD::UREM, VT, Expand);

    // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
    setOperationAction(ISD::ADDC, VT, Custom);
    setOperationAction(ISD::ADDE, VT, Custom);
    setOperationAction(ISD::SUBC, VT, Custom);
    setOperationAction(ISD::SUBE, VT, Custom);
  }

  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
  if (Subtarget->is64Bit())
    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
  setOperationAction(ISD::FREM             , MVT::f32  , Expand);
  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
  setOperationAction(ISD::FREM             , MVT::f80  , Expand);
  setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);

  setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
  setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
  setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
  setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::CTTZ           , MVT::i64  , Custom);
    setOperationAction(ISD::CTLZ           , MVT::i64  , Custom);
  }

  if (Subtarget->hasPOPCNT()) {
    setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
  } else {
    setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
    setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
    setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
    if (Subtarget->is64Bit())
      setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
  }

  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);

  // These should be promoted to a larger select which is supported.
  setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
  // X86 wants to expand cmov itself.
  setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
  setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
  setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
  }
  setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);

  // Darwin ABI issue.
  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
  setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
  if (Subtarget->is64Bit())
    setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
  setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
    setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
  }
  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
    setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
    setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
  }

  if (Subtarget->hasXMM())
    setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);

  // We may not have a libcall for MEMBARRIER so we should lower this.
  setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);

  // On X86 and X86-64, atomic operations are lowered to locked instructions.
  // Locked instructions, in turn, have implicit fence semantics (all memory
  // operations are flushed before issuing the locked instruction, and they
  // are not buffered), so we can fold away the common pattern of
  // fence-atomic-fence.
  setShouldFoldAtomicFences(true);

  // Expand certain atomics
  for (unsigned i = 0, e = 4; i != e; ++i) {
    MVT VT = IntVTs[i];
    setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
  }

  if (!Subtarget->is64Bit()) {
    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
    setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
  }

  // FIXME - use subtarget debug flags
  if (!Subtarget->isTargetDarwin() &&
      !Subtarget->isTargetELF() &&
      !Subtarget->isTargetCygMing()) {
    setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
  }

  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
  if (Subtarget->is64Bit()) {
    setExceptionPointerRegister(X86::RAX);
    setExceptionSelectorRegister(X86::RDX);
  } else {
    setExceptionPointerRegister(X86::EAX);
    setExceptionSelectorRegister(X86::EDX);
  }
  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);

  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);

  setOperationAction(ISD::TRAP, MVT::Other, Legal);

  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
  if (Subtarget->is64Bit()) {
    setOperationAction(ISD::VAARG           , MVT::Other, Custom);
    setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
  } else {
    setOperationAction(ISD::VAARG           , MVT::Other, Expand);
    setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
  }

  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
  setOperationAction(ISD::DYNAMIC_STACKALLOC,
                     (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
                     (Subtarget->isTargetCOFF()
                      && !Subtarget->isTargetEnvMacho()
                      ? Custom : Expand));

  if (!UseSoftFloat && X86ScalarSSEf64) {
    // f32 and f64 use SSE.
    // Set up the FP register classes.
    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
    addRegisterClass(MVT::f64, X86::FR64RegisterClass);

    // Use ANDPD to simulate FABS.
    setOperationAction(ISD::FABS , MVT::f64, Custom);
    setOperationAction(ISD::FABS , MVT::f32, Custom);

    // Use XORP to simulate FNEG.
    setOperationAction(ISD::FNEG , MVT::f64, Custom);
    setOperationAction(ISD::FNEG , MVT::f32, Custom);

    // Use ANDPD and ORPD to simulate FCOPYSIGN.
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

    // We don't support sin/cos/fmod
    setOperationAction(ISD::FSIN , MVT::f64, Expand);
    setOperationAction(ISD::FCOS , MVT::f64, Expand);
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);

    // Expand FP immediates into loads from the stack, except for the special
    // cases we handle.
    addLegalFPImmediate(APFloat(+0.0)); // xorpd
    addLegalFPImmediate(APFloat(+0.0f)); // xorps
  } else if (!UseSoftFloat && X86ScalarSSEf32) {
    // Use SSE for f32, x87 for f64.
    // Set up the FP register classes.
    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);

    // Use ANDPS to simulate FABS.
    setOperationAction(ISD::FABS , MVT::f32, Custom);

    // Use XORP to simulate FNEG.
    setOperationAction(ISD::FNEG , MVT::f32, Custom);

    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);

    // Use ANDPS and ORPS to simulate FCOPYSIGN.
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);

    // We don't support sin/cos/fmod
    setOperationAction(ISD::FSIN , MVT::f32, Expand);
    setOperationAction(ISD::FCOS , MVT::f32, Expand);

    // Special cases we handle for FP constants.
    addLegalFPImmediate(APFloat(+0.0f)); // xorps
    addLegalFPImmediate(APFloat(+0.0)); // FLD0
    addLegalFPImmediate(APFloat(+1.0)); // FLD1
    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS

    if (!UnsafeFPMath) {
      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
    }
  } else if (!UseSoftFloat) {
    // f32 and f64 in x87.
    // Set up the FP register classes.
    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
    addRegisterClass(MVT::f32, X86::RFP32RegisterClass);

    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
    setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);

    if (!UnsafeFPMath) {
      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
    }
    addLegalFPImmediate(APFloat(+0.0)); // FLD0
    addLegalFPImmediate(APFloat(+1.0)); // FLD1
    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
    addLegalFPImmediate(APFloat(+0.0f)); // FLD0
    addLegalFPImmediate(APFloat(+1.0f)); // FLD1
    addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
    addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
  }

  // Long double always uses X87.
  if (!UseSoftFloat) {
    addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
    setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
    setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
    {
      APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
      addLegalFPImmediate(TmpFlt);  // FLD0
      TmpFlt.changeSign();
      addLegalFPImmediate(TmpFlt);  // FLD0/FCHS

      bool ignored;
      APFloat TmpFlt2(+1.0);
      TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
                      &ignored);
      addLegalFPImmediate(TmpFlt2);  // FLD1
      TmpFlt2.changeSign();
      addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
    }

    if (!UnsafeFPMath) {
      setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
      setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
    }
  }

  // Always use a library call for pow.
  setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
  setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
  setOperationAction(ISD::FPOW             , MVT::f80  , Expand);

  setOperationAction(ISD::FLOG, MVT::f80, Expand);
  setOperationAction(ISD::FLOG2, MVT::f80, Expand);
  setOperationAction(ISD::FLOG10, MVT::f80, Expand);
  setOperationAction(ISD::FEXP, MVT::f80, Expand);
  setOperationAction(ISD::FEXP2, MVT::f80, Expand);

  // First set operation action for all vector types to either promote
  // (for widening) or expand (for scalarization). Then we will selectively
  // turn on ones that can be effectively codegen'd.
  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
    setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
    setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
    setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
    setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
    setOperationAction(ISD::TRUNCATE,  (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::SIGN_EXTEND,  (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::ZERO_EXTEND,  (MVT::SimpleValueType)VT, Expand);
    setOperationAction(ISD::ANY_EXTEND,  (MVT::SimpleValueType)VT, Expand);
    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
      setTruncStoreAction((MVT::SimpleValueType)VT,
                          (MVT::SimpleValueType)InnerVT, Expand);
    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
  }

  // FIXME: In order to prevent SSE instructions being expanded to MMX ones
  // with -msoft-float, disable use of MMX as well.
  if (!UseSoftFloat && Subtarget->hasMMX()) {
    addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
    // No operations on x86mmx supported, everything uses intrinsics.
  }

  // MMX-sized vectors (other than x86mmx) are expected to be expanded
  // into smaller operations.
  setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
  setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
  setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
  setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
  setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
  setOperationAction(ISD::AND,                MVT::v4i16, Expand);
  setOperationAction(ISD::AND,                MVT::v2i32, Expand);
  setOperationAction(ISD::AND,                MVT::v1i64, Expand);
  setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
  setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
  setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
  setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
  setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
  setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
  setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
  setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
  setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
  setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
  setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
  setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
  setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
  setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
  setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
  setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
  setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);

  if (!UseSoftFloat && Subtarget->hasXMM()) {
    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);

    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
    setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
    setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
    setOperationAction(ISD::VSETCC,             MVT::v4f32, Custom);
  }

  if (!UseSoftFloat && Subtarget->hasXMMInt()) {
    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);

    // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
    // registers cannot be used even for integer operations.
    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);

    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
    setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
    setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
    setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
    setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
    setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);

    setOperationAction(ISD::VSETCC,             MVT::v2f64, Custom);
    setOperationAction(ISD::VSETCC,             MVT::v16i8, Custom);
    setOperationAction(ISD::VSETCC,             MVT::v8i16, Custom);
    setOperationAction(ISD::VSETCC,             MVT::v4i32, Custom);

    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);

    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2f64, Custom);
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2i64, Custom);
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i8, Custom);
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i16, Custom);
    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i32, Custom);

    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
      EVT VT = (MVT::SimpleValueType)i;
      // Do not attempt to custom lower non-power-of-2 vectors
      if (!isPowerOf2_32(VT.getVectorNumElements()))
        continue;
      // Do not attempt to custom lower non-128-bit vectors
      if (!VT.is128BitVector())
        continue;
      setOperationAction(ISD::BUILD_VECTOR,
                         VT.getSimpleVT().SimpleTy, Custom);
      setOperationAction(ISD::VECTOR_SHUFFLE,
                         VT.getSimpleVT().SimpleTy, Custom);
      setOperationAction(ISD::EXTRACT_VECTOR_ELT,
                         VT.getSimpleVT().SimpleTy, Custom);
    }

    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);

    if (Subtarget->is64Bit()) {
      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
    }

    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
      EVT VT = SVT;

      // Do not attempt to promote non-128-bit vectors
      if (!VT.is128BitVector())
        continue;

      setOperationAction(ISD::AND,    SVT, Promote);
      AddPromotedToType (ISD::AND,    SVT, MVT::v2i64);
      setOperationAction(ISD::OR,     SVT, Promote);
      AddPromotedToType (ISD::OR,     SVT, MVT::v2i64);
      setOperationAction(ISD::XOR,    SVT, Promote);
      AddPromotedToType (ISD::XOR,    SVT, MVT::v2i64);
      setOperationAction(ISD::LOAD,   SVT, Promote);
      AddPromotedToType (ISD::LOAD,   SVT, MVT::v2i64);
      setOperationAction(ISD::SELECT, SVT, Promote);
      AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
    }

    setTruncStoreAction(MVT::f64, MVT::f32, Expand);

    // Custom lower v2i64 and v2f64 selects.
    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);

    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
  }

  if (Subtarget->hasSSE41()) {
    setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
    setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
    setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
    setOperationAction(ISD::FRINT,              MVT::f32,   Legal);
    setOperationAction(ISD::FNEARBYINT,         MVT::f32,   Legal);
    setOperationAction(ISD::FFLOOR,             MVT::f64,   Legal);
    setOperationAction(ISD::FCEIL,              MVT::f64,   Legal);
    setOperationAction(ISD::FTRUNC,             MVT::f64,   Legal);
    setOperationAction(ISD::FRINT,              MVT::f64,   Legal);
    setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);

    // FIXME: Do we need to handle scalar-to-vector here?
    setOperationAction(ISD::MUL,                MVT::v4i32, Legal);

    // Can turn SHL into an integer multiply.
    setOperationAction(ISD::SHL,                MVT::v4i32, Custom);
    setOperationAction(ISD::SHL,                MVT::v16i8, Custom);

    // i8 and i16 vectors are custom , because the source register and source
    // source memory operand types are not the same width.  f32 vectors are
    // custom since the immediate controlling the insert encodes additional
    // information.
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);

    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);

    if (Subtarget->is64Bit()) {
      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Legal);
      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
    }
  }

  if (Subtarget->hasSSE2()) {
    setOperationAction(ISD::SRL,               MVT::v2i64, Custom);
    setOperationAction(ISD::SRL,               MVT::v4i32, Custom);
    setOperationAction(ISD::SRL,               MVT::v16i8, Custom);

    setOperationAction(ISD::SHL,               MVT::v2i64, Custom);
    setOperationAction(ISD::SHL,               MVT::v4i32, Custom);
    setOperationAction(ISD::SHL,               MVT::v8i16, Custom);

    setOperationAction(ISD::SRA,               MVT::v4i32, Custom);
    setOperationAction(ISD::SRA,               MVT::v8i16, Custom);
  }

  if (Subtarget->hasSSE42())
    setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);

  if (!UseSoftFloat && Subtarget->hasAVX()) {
    addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
    addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
    addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
    addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
    addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);

    setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
    setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
    setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
    setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);

    setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
    setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
    setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
    setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
    setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
    setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);

    setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
    setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
    setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
    setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
    setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
    setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);

    // Custom lower build_vector, vector_shuffle, scalar_to_vector,
    // insert_vector_elt extract_subvector and extract_vector_elt for
    // 256-bit types.
    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
         ++i) {
      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
      // Do not attempt to custom lower non-256-bit vectors