X86InstrInfo.cpp

  if (MI != MBB.end()) DL = MI->getDebugLoc();
  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
}

void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
                                 SmallVectorImpl<MachineOperand> &Addr,
                                 const TargetRegisterClass *RC,
                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
  bool isAligned = (RI.getStackAlignment() >= 16) ||
    RI.needsStackRealignment(MF);
  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
  DebugLoc DL = DebugLoc::getUnknownLoc();
  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
    MIB.addOperand(Addr[i]);
  NewMIs.push_back(MIB);
}

bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                             MachineBasicBlock::iterator MI,
                                const std::vector<CalleeSavedInfo> &CSI) const {
  if (CSI.empty())
    return false;

  DebugLoc DL = DebugLoc::getUnknownLoc();
  if (MI != MBB.end()) DL = MI->getDebugLoc();

  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
  unsigned SlotSize = is64Bit ? 8 : 4;

  MachineFunction &MF = *MBB.getParent();
  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
  unsigned CalleeFrameSize = 0;
  
  unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
  for (unsigned i = CSI.size(); i != 0; --i) {
    unsigned Reg = CSI[i-1].getReg();
    const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
    // Add the callee-saved register as live-in. It's killed at the spill.
    MBB.addLiveIn(Reg);
    if (RegClass != &X86::VR128RegClass) {
      CalleeFrameSize += SlotSize;
      BuildMI(MBB, MI, DL, get(Opc))
        .addReg(Reg, RegState::Kill);
    } else {
      storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
    }
  }

  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
  return true;
}

bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
                                               MachineBasicBlock::iterator MI,
                                const std::vector<CalleeSavedInfo> &CSI) const {
  if (CSI.empty())
    return false;

  DebugLoc DL = DebugLoc::getUnknownLoc();
  if (MI != MBB.end()) DL = MI->getDebugLoc();

  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();

  unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r;
  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
    unsigned Reg = CSI[i].getReg();
    const TargetRegisterClass *RegClass = CSI[i].getRegClass();
    if (RegClass != &X86::VR128RegClass) {
      BuildMI(MBB, MI, DL, get(Opc), Reg);
    } else {
      loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
    }
  }
  return true;
}

static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
                                     const SmallVectorImpl<MachineOperand> &MOs,
                                     MachineInstr *MI,
                                     const TargetInstrInfo &TII) {
  // Create the base instruction with the memory operand as the first part.
  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                              MI->getDebugLoc(), true);
  MachineInstrBuilder MIB(NewMI);
  unsigned NumAddrOps = MOs.size();
  for (unsigned i = 0; i != NumAddrOps; ++i)
    MIB.addOperand(MOs[i]);
  if (NumAddrOps < 4)  // FrameIndex only
    addOffset(MIB, 0);
  
  // Loop over the rest of the ri operands, converting them over.
  unsigned NumOps = MI->getDesc().getNumOperands()-2;
  for (unsigned i = 0; i != NumOps; ++i) {
    MachineOperand &MO = MI->getOperand(i+2);
    MIB.addOperand(MO);
  }
  for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
    MachineOperand &MO = MI->getOperand(i);
    MIB.addOperand(MO);
  }
  return MIB;
}

static MachineInstr *FuseInst(MachineFunction &MF,
                              unsigned Opcode, unsigned OpNo,
                              const SmallVectorImpl<MachineOperand> &MOs,
                              MachineInstr *MI, const TargetInstrInfo &TII) {
  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
                                              MI->getDebugLoc(), true);
  MachineInstrBuilder MIB(NewMI);
  
  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    MachineOperand &MO = MI->getOperand(i);
    if (i == OpNo) {
      assert(MO.isReg() && "Expected to fold into reg operand!");
      unsigned NumAddrOps = MOs.size();
      for (unsigned i = 0; i != NumAddrOps; ++i)
        MIB.addOperand(MOs[i]);
      if (NumAddrOps < 4)  // FrameIndex only
        addOffset(MIB, 0);
    } else {
      MIB.addOperand(MO);
    }
  }
  return MIB;
}

static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
                                const SmallVectorImpl<MachineOperand> &MOs,
                                MachineInstr *MI) {
  MachineFunction &MF = *MI->getParent()->getParent();
  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));

  unsigned NumAddrOps = MOs.size();
  for (unsigned i = 0; i != NumAddrOps; ++i)
    MIB.addOperand(MOs[i]);
  if (NumAddrOps < 4)  // FrameIndex only
    addOffset(MIB, 0);
  return MIB.addImm(0);
}

MachineInstr*
X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                    MachineInstr *MI, unsigned i,
                                    const SmallVectorImpl<MachineOperand> &MOs) const{
  const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
  bool isTwoAddrFold = false;
  unsigned NumOps = MI->getDesc().getNumOperands();
  bool isTwoAddr = NumOps > 1 &&
    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;

  MachineInstr *NewMI = NULL;
  // Folding a memory location into the two-address part of a two-address
  // instruction is different than folding it other places.  It requires
  // replacing the *two* registers with the memory location.
  if (isTwoAddr && NumOps >= 2 && i < 2 &&
      MI->getOperand(0).isReg() &&
      MI->getOperand(1).isReg() &&
      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 
    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
    isTwoAddrFold = true;
  } else if (i == 0) { // If operand 0
    if (MI->getOpcode() == X86::MOV16r0)
      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
    else if (MI->getOpcode() == X86::MOV32r0)
      NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
    else if (MI->getOpcode() == X86::MOV64r0)
      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
    else if (MI->getOpcode() == X86::MOV8r0)
      NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
    if (NewMI)
      return NewMI;
    
    OpcodeTablePtr = &RegOp2MemOpTable0;
  } else if (i == 1) {
    OpcodeTablePtr = &RegOp2MemOpTable1;
  } else if (i == 2) {
    OpcodeTablePtr = &RegOp2MemOpTable2;
  }
  
  // If table selected...
  if (OpcodeTablePtr) {
    // Find the Opcode to fuse
    DenseMap<unsigned*, unsigned>::iterator I =
      OpcodeTablePtr->find((unsigned*)MI->getOpcode());
    if (I != OpcodeTablePtr->end()) {
      if (isTwoAddrFold)
        NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this);
      else
        NewMI = FuseInst(MF, I->second, i, MOs, MI, *this);
      return NewMI;
    }
  }
  
  // No fusion 
  if (PrintFailedFusing)
    cerr << "We failed to fuse operand " << i << " in " << *MI;
  return NULL;
}


MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                  MachineInstr *MI,
                                                  const SmallVectorImpl<unsigned> &Ops,
                                                  int FrameIndex) const {
  // Check switch flag 
  if (NoFusing) return NULL;

  const MachineFrameInfo *MFI = MF.getFrameInfo();
  unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
  // FIXME: Move alignment requirement into tables?
  if (Alignment < 16) {
    switch (MI->getOpcode()) {
    default: break;
    // Not always safe to fold movsd into these instructions since their load
    // folding variants expects the address to be 16 byte aligned.
    case X86::FsANDNPDrr:
    case X86::FsANDNPSrr:
    case X86::FsANDPDrr:
    case X86::FsANDPSrr:
    case X86::FsORPDrr:
    case X86::FsORPSrr:
    case X86::FsXORPDrr:
    case X86::FsXORPSrr:
      return NULL;
    }
  }

  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
    unsigned NewOpc = 0;
    switch (MI->getOpcode()) {
    default: return NULL;
    case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
    case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
    case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
    case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
    }
    // Change to CMPXXri r, 0 first.
    MI->setDesc(get(NewOpc));
    MI->getOperand(1).ChangeToImmediate(0);
  } else if (Ops.size() != 1)
    return NULL;

  SmallVector<MachineOperand,4> MOs;
  MOs.push_back(MachineOperand::CreateFI(FrameIndex));
  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
}

MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
                                                  MachineInstr *MI,
                                            const SmallVectorImpl<unsigned> &Ops,
                                                  MachineInstr *LoadMI) const {
  // Check switch flag 
  if (NoFusing) return NULL;

  // Determine the alignment of the load.
  unsigned Alignment = 0;
  if (LoadMI->hasOneMemOperand())
    Alignment = LoadMI->memoperands_begin()->getAlignment();

  // FIXME: Move alignment requirement into tables?
  if (Alignment < 16) {
    switch (MI->getOpcode()) {
    default: break;
    // Not always safe to fold movsd into these instructions since their load
    // folding variants expects the address to be 16 byte aligned.
    case X86::FsANDNPDrr:
    case X86::FsANDNPSrr:
    case X86::FsANDPDrr:
    case X86::FsANDPSrr:
    case X86::FsORPDrr:
    case X86::FsORPSrr:
    case X86::FsXORPDrr:
    case X86::FsXORPSrr:
      return NULL;
    }
  }

  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
    unsigned NewOpc = 0;
    switch (MI->getOpcode()) {
    default: return NULL;
    case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
    case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
    case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
    case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
    }
    // Change to CMPXXri r, 0 first.
    MI->setDesc(get(NewOpc));
    MI->getOperand(1).ChangeToImmediate(0);
  } else if (Ops.size() != 1)
    return NULL;

  SmallVector<MachineOperand,X86AddrNumOperands> MOs;
  if (LoadMI->getOpcode() == X86::V_SET0 ||
      LoadMI->getOpcode() == X86::V_SETALLONES) {
    // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
    // Create a constant-pool entry and operands to load from it.

    // x86-32 PIC requires a PIC base register for constant pools.
    unsigned PICBase = 0;
    if (TM.getRelocationModel() == Reloc::PIC_ &&
        !TM.getSubtarget<X86Subtarget>().is64Bit())
      // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
      // This doesn't work for several reasons.
      // 1. GlobalBaseReg may have been spilled.
      // 2. It may not be live at MI.
      return false;

    // Create a v4i32 constant-pool entry.
    MachineConstantPool &MCP = *MF.getConstantPool();
    const VectorType *Ty = VectorType::get(Type::Int32Ty, 4);
    Constant *C = LoadMI->getOpcode() == X86::V_SET0 ?
                    ConstantVector::getNullValue(Ty) :
                    ConstantVector::getAllOnesValue(Ty);
    unsigned CPI = MCP.getConstantPoolIndex(C, 16);

    // Create operands to load from the constant pool entry.
    MOs.push_back(MachineOperand::CreateReg(PICBase, false));
    MOs.push_back(MachineOperand::CreateImm(1));
    MOs.push_back(MachineOperand::CreateReg(0, false));
    MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
    MOs.push_back(MachineOperand::CreateReg(0, false));
  } else {
    // Folding a normal load. Just copy the load's address operands.
    unsigned NumOps = LoadMI->getDesc().getNumOperands();
    for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
      MOs.push_back(LoadMI->getOperand(i));
  }
  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs);
}


bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
                                  const SmallVectorImpl<unsigned> &Ops) const {
  // Check switch flag 
  if (NoFusing) return 0;

  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
    switch (MI->getOpcode()) {
    default: return false;
    case X86::TEST8rr: 
    case X86::TEST16rr:
    case X86::TEST32rr:
    case X86::TEST64rr:
      return true;
    }
  }

  if (Ops.size() != 1)
    return false;

  unsigned OpNum = Ops[0];
  unsigned Opc = MI->getOpcode();
  unsigned NumOps = MI->getDesc().getNumOperands();
  bool isTwoAddr = NumOps > 1 &&
    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;

  // Folding a memory location into the two-address part of a two-address
  // instruction is different than folding it other places.  It requires
  // replacing the *two* registers with the memory location.
  const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
  if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 
    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
  } else if (OpNum == 0) { // If operand 0
    switch (Opc) {
    case X86::MOV16r0:
    case X86::MOV32r0:
    case X86::MOV64r0:
    case X86::MOV8r0:
      return true;
    default: break;
    }
    OpcodeTablePtr = &RegOp2MemOpTable0;
  } else if (OpNum == 1) {
    OpcodeTablePtr = &RegOp2MemOpTable1;
  } else if (OpNum == 2) {
    OpcodeTablePtr = &RegOp2MemOpTable2;
  }
  
  if (OpcodeTablePtr) {
    // Find the Opcode to fuse
    DenseMap<unsigned*, unsigned>::iterator I =
      OpcodeTablePtr->find((unsigned*)Opc);
    if (I != OpcodeTablePtr->end())
      return true;
  }
  return false;
}

bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
                                unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
                                SmallVectorImpl<MachineInstr*> &NewMIs) const {
  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
    MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
  if (I == MemOp2RegOpTable.end())
    return false;
  DebugLoc dl = MI->getDebugLoc();
  unsigned Opc = I->second.first;
  unsigned Index = I->second.second & 0xf;
  bool FoldedLoad = I->second.second & (1 << 4);
  bool FoldedStore = I->second.second & (1 << 5);
  if (UnfoldLoad && !FoldedLoad)
    return false;
  UnfoldLoad &= FoldedLoad;
  if (UnfoldStore && !FoldedStore)
    return false;
  UnfoldStore &= FoldedStore;

  const TargetInstrDesc &TID = get(Opc);
  const TargetOperandInfo &TOI = TID.OpInfo[Index];
  const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
    ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
  SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
  SmallVector<MachineOperand,2> BeforeOps;
  SmallVector<MachineOperand,2> AfterOps;
  SmallVector<MachineOperand,4> ImpOps;
  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    MachineOperand &Op = MI->getOperand(i);
    if (i >= Index && i < Index + X86AddrNumOperands)
      AddrOps.push_back(Op);
    else if (Op.isReg() && Op.isImplicit())
      ImpOps.push_back(Op);
    else if (i < Index)
      BeforeOps.push_back(Op);
    else if (i > Index)
      AfterOps.push_back(Op);
  }

  // Emit the load instruction.
  if (UnfoldLoad) {
    loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
    if (UnfoldStore) {
      // Address operands cannot be marked isKill.
      for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
        MachineOperand &MO = NewMIs[0]->getOperand(i);
        if (MO.isReg())
          MO.setIsKill(false);
      }
    }
  }

  // Emit the data processing instruction.
  MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
  MachineInstrBuilder MIB(DataMI);
  
  if (FoldedStore)
    MIB.addReg(Reg, RegState::Define);
  for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
    MIB.addOperand(BeforeOps[i]);
  if (FoldedLoad)
    MIB.addReg(Reg);
  for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
    MIB.addOperand(AfterOps[i]);
  for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
    MachineOperand &MO = ImpOps[i];
    MIB.addReg(MO.getReg(),
               getDefRegState(MO.isDef()) |
               RegState::Implicit |
               getKillRegState(MO.isKill()) |
               getDeadRegState(MO.isDead()) |
               getUndefRegState(MO.isUndef()));
  }
  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
  unsigned NewOpc = 0;
  switch (DataMI->getOpcode()) {
  default: break;
  case X86::CMP64ri32:
  case X86::CMP32ri:
  case X86::CMP16ri:
  case X86::CMP8ri: {
    MachineOperand &MO0 = DataMI->getOperand(0);
    MachineOperand &MO1 = DataMI->getOperand(1);
    if (MO1.getImm() == 0) {
      switch (DataMI->getOpcode()) {
      default: break;
      case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
      case X86::CMP32ri:   NewOpc = X86::TEST32rr; break;
      case X86::CMP16ri:   NewOpc = X86::TEST16rr; break;
      case X86::CMP8ri:    NewOpc = X86::TEST8rr; break;
      }
      DataMI->setDesc(get(NewOpc));
      MO1.ChangeToRegister(MO0.getReg(), false);
    }
  }
  }
  NewMIs.push_back(DataMI);

  // Emit the store instruction.
  if (UnfoldStore) {
    const TargetOperandInfo &DstTOI = TID.OpInfo[0];
    const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass()
      ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs);
  }

  return true;
}

bool
X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
                                  SmallVectorImpl<SDNode*> &NewNodes) const {
  if (!N->isMachineOpcode())
    return false;

  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
    MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
  if (I == MemOp2RegOpTable.end())
    return false;
  unsigned Opc = I->second.first;
  unsigned Index = I->second.second & 0xf;
  bool FoldedLoad = I->second.second & (1 << 4);
  bool FoldedStore = I->second.second & (1 << 5);
  const TargetInstrDesc &TID = get(Opc);
  const TargetOperandInfo &TOI = TID.OpInfo[Index];
  const TargetRegisterClass *RC = TOI.isLookupPtrRegClass()
    ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass);
  unsigned NumDefs = TID.NumDefs;
  std::vector<SDValue> AddrOps;
  std::vector<SDValue> BeforeOps;
  std::vector<SDValue> AfterOps;
  DebugLoc dl = N->getDebugLoc();
  unsigned NumOps = N->getNumOperands();
  for (unsigned i = 0; i != NumOps-1; ++i) {
    SDValue Op = N->getOperand(i);
    if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands)
      AddrOps.push_back(Op);
    else if (i < Index-NumDefs)
      BeforeOps.push_back(Op);
    else if (i > Index-NumDefs)
      AfterOps.push_back(Op);
  }
  SDValue Chain = N->getOperand(NumOps-1);
  AddrOps.push_back(Chain);

  // Emit the load instruction.
  SDNode *Load = 0;
  const MachineFunction &MF = DAG.getMachineFunction();
  if (FoldedLoad) {
    MVT VT = *RC->vt_begin();
    bool isAligned = (RI.getStackAlignment() >= 16) ||
      RI.needsStackRealignment(MF);
    Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
                             VT, MVT::Other, &AddrOps[0], AddrOps.size());
    NewNodes.push_back(Load);
  }

  // Emit the data processing instruction.
  std::vector<MVT> VTs;
  const TargetRegisterClass *DstRC = 0;
  if (TID.getNumDefs() > 0) {
    const TargetOperandInfo &DstTOI = TID.OpInfo[0];
    DstRC = DstTOI.isLookupPtrRegClass()
      ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass);
    VTs.push_back(*DstRC->vt_begin());
  }
  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
    MVT VT = N->getValueType(i);
    if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
      VTs.push_back(VT);
  }
  if (Load)
    BeforeOps.push_back(SDValue(Load, 0));
  std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
  SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0],
                                     BeforeOps.size());
  NewNodes.push_back(NewNode);

  // Emit the store instruction.
  if (FoldedStore) {
    AddrOps.pop_back();
    AddrOps.push_back(SDValue(NewNode, 0));
    AddrOps.push_back(Chain);
    bool isAligned = (RI.getStackAlignment() >= 16) ||
      RI.needsStackRealignment(MF);
    SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC,
                                                        isAligned, TM),
                                      dl, MVT::Other,
                                      &AddrOps[0], AddrOps.size());
    NewNodes.push_back(Store);
  }

  return true;
}

unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
                                      bool UnfoldLoad, bool UnfoldStore) const {
  DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
    MemOp2RegOpTable.find((unsigned*)Opc);
  if (I == MemOp2RegOpTable.end())
    return 0;
  bool FoldedLoad = I->second.second & (1 << 4);
  bool FoldedStore = I->second.second & (1 << 5);
  if (UnfoldLoad && !FoldedLoad)
    return 0;
  if (UnfoldStore && !FoldedStore)
    return 0;
  return I->second.first;
}

bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
  if (MBB.empty()) return false;
  
  switch (MBB.back().getOpcode()) {
  case X86::TCRETURNri:
  case X86::TCRETURNdi:
  case X86::RET:     // Return.
  case X86::RETI:
  case X86::TAILJMPd:
  case X86::TAILJMPr:
  case X86::TAILJMPm:
  case X86::JMP:     // Uncond branch.
  case X86::JMP32r:  // Indirect branch.
  case X86::JMP64r:  // Indirect branch (64-bit).
  case X86::JMP32m:  // Indirect branch through mem.
  case X86::JMP64m:  // Indirect branch through mem (64-bit).
    return true;
  default: return false;
  }
}

bool X86InstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
  assert(Cond.size() == 1 && "Invalid X86 branch condition!");
  X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
  if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
    return true;
  Cond[0].setImm(GetOppositeBranchCondition(CC));
  return false;
}

bool X86InstrInfo::
isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
  // FIXME: Return false for x87 stack register classes for now. We can't
  // allow any loads of these registers before FpGet_ST0_80.
  return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
           RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
}

unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) {
  switch (Desc->TSFlags & X86II::ImmMask) {
  case X86II::Imm8:   return 1;
  case X86II::Imm16:  return 2;
  case X86II::Imm32:  return 4;
  case X86II::Imm64:  return 8;
  default: assert(0 && "Immediate size not set!");
    return 0;
  }
}

/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register?
/// e.g. r8, xmm8, etc.
bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) {
  if (!MO.isReg()) return false;
  switch (MO.getReg()) {
  default: break;
  case X86::R8:    case X86::R9:    case X86::R10:   case X86::R11:
  case X86::R12:   case X86::R13:   case X86::R14:   case X86::R15:
  case X86::R8D:   case X86::R9D:   case X86::R10D:  case X86::R11D:
  case X86::R12D:  case X86::R13D:  case X86::R14D:  case X86::R15D:
  case X86::R8W:   case X86::R9W:   case X86::R10W:  case X86::R11W:
  case X86::R12W:  case X86::R13W:  case X86::R14W:  case X86::R15W:
  case X86::R8B:   case X86::R9B:   case X86::R10B:  case X86::R11B:
  case X86::R12B:  case X86::R13B:  case X86::R14B:  case X86::R15B:
  case X86::XMM8:  case X86::XMM9:  case X86::XMM10: case X86::XMM11:
  case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
    return true;
  }
  return false;
}


/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
/// size, and 3) use of X86-64 extended registers.
unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
  unsigned REX = 0;
  const TargetInstrDesc &Desc = MI.getDesc();

  // Pseudo instructions do not need REX prefix byte.
  if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
    return 0;
  if (Desc.TSFlags & X86II::REX_W)
    REX |= 1 << 3;

  unsigned NumOps = Desc.getNumOperands();
  if (NumOps) {
    bool isTwoAddr = NumOps > 1 &&
      Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;

    // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
    unsigned i = isTwoAddr ? 1 : 0;
    for (unsigned e = NumOps; i != e; ++i) {
      const MachineOperand& MO = MI.getOperand(i);
      if (MO.isReg()) {
        unsigned Reg = MO.getReg();
        if (isX86_64NonExtLowByteReg(Reg))
          REX |= 0x40;
      }
    }

    switch (Desc.TSFlags & X86II::FormMask) {
    case X86II::MRMInitReg:
      if (isX86_64ExtendedReg(MI.getOperand(0)))
        REX |= (1 << 0) | (1 << 2);
      break;
    case X86II::MRMSrcReg: {
      if (isX86_64ExtendedReg(MI.getOperand(0)))
        REX |= 1 << 2;
      i = isTwoAddr ? 2 : 1;
      for (unsigned e = NumOps; i != e; ++i) {
        const MachineOperand& MO = MI.getOperand(i);
        if (isX86_64ExtendedReg(MO))
          REX |= 1 << 0;
      }
      break;
    }
    case X86II::MRMSrcMem: {
      if (isX86_64ExtendedReg(MI.getOperand(0)))
        REX |= 1 << 2;
      unsigned Bit = 0;
      i = isTwoAddr ? 2 : 1;
      for (; i != NumOps; ++i) {
        const MachineOperand& MO = MI.getOperand(i);
        if (MO.isReg()) {
          if (isX86_64ExtendedReg(MO))
            REX |= 1 << Bit;
          Bit++;
        }
      }
      break;
    }
    case X86II::MRM0m: case X86II::MRM1m:
    case X86II::MRM2m: case X86II::MRM3m:
    case X86II::MRM4m: case X86II::MRM5m:
    case X86II::MRM6m: case X86II::MRM7m:
    case X86II::MRMDestMem: {
      unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
      i = isTwoAddr ? 1 : 0;
      if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
        REX |= 1 << 2;
      unsigned Bit = 0;
      for (; i != e; ++i) {
        const MachineOperand& MO = MI.getOperand(i);
        if (MO.isReg()) {
          if (isX86_64ExtendedReg(MO))
            REX |= 1 << Bit;
          Bit++;
        }
      }
      break;
    }
    default: {
      if (isX86_64ExtendedReg(MI.getOperand(0)))
        REX |= 1 << 0;
      i = isTwoAddr ? 2 : 1;
      for (unsigned e = NumOps; i != e; ++i) {
        const MachineOperand& MO = MI.getOperand(i);
        if (isX86_64ExtendedReg(MO))
          REX |= 1 << 2;
      }
      break;
    }
    }
  }
  return REX;
}

/// sizePCRelativeBlockAddress - This method returns the size of a PC
/// relative block address instruction
///
static unsigned sizePCRelativeBlockAddress() {
  return 4;
}

/// sizeGlobalAddress - Give the size of the emission of this global address
///
static unsigned sizeGlobalAddress(bool dword) {
  return dword ? 8 : 4;
}

/// sizeConstPoolAddress - Give the size of the emission of this constant
/// pool address
///
static unsigned sizeConstPoolAddress(bool dword) {
  return dword ? 8 : 4;
}

/// sizeExternalSymbolAddress - Give the size of the emission of this external
/// symbol
///
static unsigned sizeExternalSymbolAddress(bool dword) {
  return dword ? 8 : 4;
}

/// sizeJumpTableAddress - Give the size of the emission of this jump
/// table address
///
static unsigned sizeJumpTableAddress(bool dword) {
  return dword ? 8 : 4;
}

static unsigned sizeConstant(unsigned Size) {
  return Size;
}

static unsigned sizeRegModRMByte(){
  return 1;
}

static unsigned sizeSIBByte(){
  return 1;
}

static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
  unsigned FinalSize = 0;
  // If this is a simple integer displacement that doesn't require a relocation.
  if (!RelocOp) {
    FinalSize += sizeConstant(4);
    return FinalSize;
  }
  
  // Otherwise, this is something that requires a relocation.
  if (RelocOp->isGlobal()) {
    FinalSize += sizeGlobalAddress(false);
  } else if (RelocOp->isCPI()) {
    FinalSize += sizeConstPoolAddress(false);
  } else if (RelocOp->isJTI()) {
    FinalSize += sizeJumpTableAddress(false);
  } else {
    assert(0 && "Unknown value to relocate!");
  }
  return FinalSize;
}

static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op,
                                    bool IsPIC, bool Is64BitMode) {
  const MachineOperand &Op3 = MI.getOperand(Op+3);
  int DispVal = 0;
  const MachineOperand *DispForReloc = 0;
  unsigned FinalSize = 0;
  
  // Figure out what sort of displacement we have to handle here.
  if (Op3.isGlobal()) {
    DispForReloc = &Op3;
  } else if (Op3.isCPI()) {
    if (Is64BitMode || IsPIC) {
      DispForReloc = &Op3;
    } else {
      DispVal = 1;
    }
  } else if (Op3.isJTI()) {
    if (Is64BitMode || IsPIC) {
      DispForReloc = &Op3;
    } else {
      DispVal = 1; 
    }
  } else {
    DispVal = 1;
  }

  const MachineOperand &Base     = MI.getOperand(Op);
  const MachineOperand &IndexReg = MI.getOperand(Op+2);

  unsigned BaseReg = Base.getReg();

  // Is a SIB byte needed?
  if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
      IndexReg.getReg() == 0 &&
      (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) {      
    if (BaseReg == 0) {  // Just a displacement?
      // Emit special case [disp32] encoding
      ++FinalSize; 
      FinalSize += getDisplacementFieldSize(DispForReloc);
    } else {
      unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg);
      if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
        // Emit simple indirect register encoding... [EAX] f.e.
        ++FinalSize;
      // Be pessimistic and assume it's a disp32, not a disp8
      } else {
        // Emit the most general non-SIB encoding: [REG+disp32]
        ++FinalSize;
        FinalSize += getDisplacementFieldSize(DispForReloc);
      }
    }

  } else {  // We need a SIB byte, so start by outputting the ModR/M byte first
    assert(IndexReg.getReg() != X86::ESP &&
           IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");

    bool ForceDisp32 = false;
    if (BaseReg == 0 || DispForReloc) {
      // Emit the normal disp32 encoding.
      ++FinalSize;
      ForceDisp32 = true;
    } else {
      ++FinalSize;
    }

    FinalSize += sizeSIBByte();

    // Do we need to output a displacement?
    if (DispVal != 0 || ForceDisp32) {
      FinalSize += getDisplacementFieldSize(DispForReloc);
    }
  }
  return FinalSize;
}


static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
                                    const TargetInstrDesc *Desc,
                                    bool IsPIC, bool Is64BitMode) {
  
  unsigned Opcode = Desc->Opcode;
  unsigned FinalSize = 0;

  // Emit the lock opcode prefix as needed.
  if (Desc->TSFlags & X86II::LOCK) ++FinalSize;

  // Emit segment override opcode prefix as needed.
  switch (Desc->TSFlags & X86II::SegOvrMask) {
  case X86II::FS:
  case X86II::GS:
   ++FinalSize;
   break;
  default: assert(0 && "Invalid segment!");
  case 0: break;  // No segment override!
  }

  // Emit the repeat opcode prefix as needed.
  if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize;

  // Emit the operand size opcode prefix as needed.
  if (Desc->TSFlags & X86II::OpSize) ++FinalSize;

  // Emit the address size opcode prefix as needed.
  if (Desc->TSFlags & X86II::AdSize) ++FinalSize;

  bool Need0FPrefix = false;
  switch (Desc->TSFlags & X86II::Op0Mask) {
  case X86II::TB:  // Two-byte opcode prefix
  case X86II::T8:  // 0F 38
  case X86II::TA:  // 0F 3A
    Need0FPrefix = true;
    break;
  case X86II::REP: break; // already handled.
  case X86II::XS:   // F3 0F
    ++FinalSize;
    Need0FPrefix = true;
    break;
  case X86II::XD:   // F2 0F
    ++FinalSize;
    Need0FPrefix = true;
    break;
  case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
  case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
    ++FinalSize;
    break; // Two-byte opcode prefix
  default: assert(0 && "Invalid prefix!");
  case 0: break;  // No prefix!
  }

  if (Is64BitMode) {
    // REX prefix
    unsigned REX = X86InstrInfo::determineREX(MI);
    if (REX)
      ++FinalSize;
  }

  // 0x0F escape code must be emitted just before the opcode.
  if (Need0FPrefix)
    ++FinalSize;

  switch (Desc->TSFlags & X86II::Op0Mask) {
  case X86II::T8:  // 0F 38
    ++FinalSize;
    break;
  case X86II::TA:  // 0F 3A
    ++FinalSize;
    break;
  }

  // If this is a two-address instruction, skip one of the register operands.
  unsigned NumOps = Desc->getNumOperands();
  unsigned CurOp = 0;
  if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
    CurOp++;
  else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
    // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
    --NumOps;

  switch (Desc->TSFlags & X86II::FormMask) {
  default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!");
  case X86II::Pseudo:
    // Remember the current PC offset, this is the PIC relocation
    // base address.
    switch (Opcode) {