Skip to content
X86ISelLowering.cpp 518 KiB
Newer Older
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
  // i16 instructions are longer (0x66 prefix) and potentially slower.
  return !(VT1 == MVT::i32 && VT2 == MVT::i16);
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
  // Very little shuffling can be done for 64-bit vectors right now.
    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
  // FIXME: pshufb, blends, shifts.
  return (VT.getVectorNumElements() == 2 ||
          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
          isMOVLMask(M, VT) ||
          isSHUFPMask(M, VT) ||
          isPSHUFDMask(M, VT) ||
          isPSHUFHWMask(M, VT) ||
          isPSHUFLWMask(M, VT) ||
          isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
          isUNPCKLMask(M, VT) ||
          isUNPCKHMask(M, VT) ||
          isUNPCKL_v_undef_Mask(M, VT) ||
          isUNPCKH_v_undef_Mask(M, VT));
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
  unsigned NumElts = VT.getVectorNumElements();
  // FIXME: This collection of masks seems suspect.
  if (NumElts == 2)
    return true;
  if (NumElts == 4 && VT.getSizeInBits() == 128) {
    return (isMOVLMask(Mask, VT)  ||
            isCommutedMOVLMask(Mask, VT, true) ||
            isSHUFPMask(Mask, VT) ||
            isCommutedSHUFPMask(Mask, VT));
  }
  return false;
}

//===----------------------------------------------------------------------===//
//                           X86 Scheduler Hooks
//===----------------------------------------------------------------------===//

// private utility function
MachineBasicBlock *
X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
                                                       MachineBasicBlock *MBB,
                                                       unsigned regOpc,
                                                       unsigned immOpc,
                                                       unsigned LoadOpc,
                                                       unsigned CXchgOpc,
                                                       unsigned notOpc,
                                                       unsigned EAXreg,
                                                       TargetRegisterClass *RC,
  // For the atomic bitwise operator, we generate
  //   thisMBB:
  //   newMBB:
  //     ld  t1 = [bitinstr.addr]
  //     op  t2 = t1, [bitinstr.val]
  //     mov EAX = t1
  //     lcs dest = [bitinstr.addr], t2  [EAX is implicit]
  //     bz  newMBB
  //     fallthrough -->nextMBB
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
  MachineFunction::iterator MBBIter = MBB;
  /// First build the CFG
  MachineFunction *F = MBB->getParent();
  MachineBasicBlock *thisMBB = MBB;
  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
  F->insert(MBBIter, newMBB);
  F->insert(MBBIter, nextMBB);
  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
  nextMBB->splice(nextMBB->begin(), thisMBB,
                  llvm::next(MachineBasicBlock::iterator(bInstr)),
                  thisMBB->end());
  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
  // Update thisMBB to fall through to newMBB
  thisMBB->addSuccessor(newMBB);
  // newMBB jumps to itself and fall through to nextMBB
  newMBB->addSuccessor(nextMBB);
  newMBB->addSuccessor(newMBB);
  // Insert instructions into newMBB based on incoming instruction
  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
Bill Wendling's avatar
Bill Wendling committed
         "unexpected number of operands");
  DebugLoc dl = bInstr->getDebugLoc();
  MachineOperand& destOper = bInstr->getOperand(0);
  MachineOperand* argOpers[2 + X86::AddrNumOperands];
  int numArgs = bInstr->getNumOperands() - 1;
  for (int i=0; i < numArgs; ++i)
    argOpers[i] = &bInstr->getOperand(i+1);

  // x86 address has 4 operands: base, index, scale, and displacement
  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
  int valArgIndx = lastAddrIndx + 1;
  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
  for (int i=0; i <= lastAddrIndx; ++i)
    (*MIB).addOperand(*argOpers[i]);
  unsigned tt = F->getRegInfo().createVirtualRegister(RC);
    MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
  assert((argOpers[valArgIndx]->isReg() ||
          argOpers[valArgIndx]->isImm()) &&
  if (argOpers[valArgIndx]->isReg())
    MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
    MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
  MIB.addReg(tt);
  (*MIB).addOperand(*argOpers[valArgIndx]);
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
  MIB.addReg(t1);
  MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
  for (int i=0; i <= lastAddrIndx; ++i)
    (*MIB).addOperand(*argOpers[i]);
  MIB.addReg(t2);
  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
  (*MIB).setMemRefs(bInstr->memoperands_begin(),
                    bInstr->memoperands_end());
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
  MIB.addReg(EAXreg);
  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
// private utility function:  64 bit atomics on 32 bit host.
MachineBasicBlock *
X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
                                                       MachineBasicBlock *MBB,
                                                       unsigned regOpcL,
                                                       unsigned regOpcH,
                                                       unsigned immOpcL,
                                                       unsigned immOpcH,
  // For the atomic bitwise operator, we generate
  //   thisMBB (instructions are in pairs, except cmpxchg8b)
  //     ld t1,t2 = [bitinstr.addr]
  //   newMBB:
  //     out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
  //     op  t5, t6 <- out1, out2, [bitinstr.val]
  //      (for SWAP, substitute:  mov t5, t6 <- [bitinstr.val])
  //     mov ECX, EBX <- t5, t6
  //     mov EAX, EDX <- t1, t2
  //     cmpxchg8b [bitinstr.addr]  [EAX, EDX, EBX, ECX implicit]
  //     mov t3, t4 <- EAX, EDX
  //     bz  newMBB
  //     result in out1, out2
  //     fallthrough -->nextMBB

  const TargetRegisterClass *RC = X86::GR32RegisterClass;
  const unsigned LoadOpc = X86::MOV32rm;
  const unsigned NotOpc = X86::NOT32r;
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
  MachineFunction::iterator MBBIter = MBB;
  ++MBBIter;
  /// First build the CFG
  MachineFunction *F = MBB->getParent();
  MachineBasicBlock *thisMBB = MBB;
  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
  F->insert(MBBIter, newMBB);
  F->insert(MBBIter, nextMBB);
  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
  nextMBB->splice(nextMBB->begin(), thisMBB,
                  llvm::next(MachineBasicBlock::iterator(bInstr)),
                  thisMBB->end());
  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
  // Update thisMBB to fall through to newMBB
  thisMBB->addSuccessor(newMBB);
  // newMBB jumps to itself and fall through to nextMBB
  newMBB->addSuccessor(nextMBB);
  newMBB->addSuccessor(newMBB);
  DebugLoc dl = bInstr->getDebugLoc();
  // Insert instructions into newMBB based on incoming instruction
  // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
Bill Wendling's avatar
Bill Wendling committed
         "unexpected number of operands");
  MachineOperand& dest1Oper = bInstr->getOperand(0);
  MachineOperand& dest2Oper = bInstr->getOperand(1);
  MachineOperand* argOpers[2 + X86::AddrNumOperands];
  for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
    argOpers[i] = &bInstr->getOperand(i+2);

    // We use some of the operands multiple times, so conservatively just
    // clear any kill flags that might be present.
    if (argOpers[i]->isReg() && argOpers[i]->isUse())
      argOpers[i]->setIsKill(false);
  }

Evan Cheng's avatar
Evan Cheng committed
  // x86 address has 5 operands: base, index, scale, displacement, and segment.
  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
  MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
  for (int i=0; i <= lastAddrIndx; ++i)
    (*MIB).addOperand(*argOpers[i]);
  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
  MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
  // add 4 to displacement.
Rafael Espindola's avatar
Rafael Espindola committed
  for (int i=0; i <= lastAddrIndx-2; ++i)
    (*MIB).addOperand(*argOpers[i]);
  MachineOperand newOp3 = *(argOpers[3]);
  if (newOp3.isImm())
    newOp3.setImm(newOp3.getImm()+4);
  else
    newOp3.setOffset(newOp3.getOffset()+4);
  (*MIB).addOperand(newOp3);
Rafael Espindola's avatar
Rafael Espindola committed
  (*MIB).addOperand(*argOpers[lastAddrIndx]);

  // t3/4 are defined later, at the bottom of the loop
  unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
  unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
  BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
    .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
  BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
    .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);

  // The subsequent operations should be using the destination registers of
  //the PHI instructions.
    t1 = F->getRegInfo().createVirtualRegister(RC);
    t2 = F->getRegInfo().createVirtualRegister(RC);
    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
    t1 = dest1Oper.getReg();
    t2 = dest2Oper.getReg();
  int valArgIndx = lastAddrIndx + 1;
  assert((argOpers[valArgIndx]->isReg() ||
Bill Wendling's avatar
Bill Wendling committed
          argOpers[valArgIndx]->isImm()) &&
         "invalid operand");
  unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
  unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
  if (argOpers[valArgIndx]->isReg())
    MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
    MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
  if (regOpcL != X86::MOV32rr)
  (*MIB).addOperand(*argOpers[valArgIndx]);
  assert(argOpers[valArgIndx + 1]->isReg() ==
Bill Wendling's avatar
Bill Wendling committed
         argOpers[valArgIndx]->isReg());
  assert(argOpers[valArgIndx + 1]->isImm() ==
Bill Wendling's avatar
Bill Wendling committed
         argOpers[valArgIndx]->isImm());
  if (argOpers[valArgIndx + 1]->isReg())
    MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
    MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
  if (regOpcH != X86::MOV32rr)
  (*MIB).addOperand(*argOpers[valArgIndx + 1]);
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
  for (int i=0; i <= lastAddrIndx; ++i)
    (*MIB).addOperand(*argOpers[i]);

  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
  (*MIB).setMemRefs(bInstr->memoperands_begin(),
                    bInstr->memoperands_end());
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
// private utility function
MachineBasicBlock *
X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
                                                      MachineBasicBlock *MBB,
  // For the atomic min/max operator, we generate
  //   thisMBB:
  //   newMBB:
  //     ld t1 = [min/max.addr]
  //     cmp  t1, t2
  //     cmov[cond] t2 = t1
  //     mov EAX = t1
  //     lcs dest = [bitinstr.addr], t2  [EAX is implicit]
  //     bz   newMBB
  //     fallthrough -->nextMBB
  //
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
  MachineFunction::iterator MBBIter = MBB;
  /// First build the CFG
  MachineFunction *F = MBB->getParent();
  MachineBasicBlock *thisMBB = MBB;
  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
  F->insert(MBBIter, newMBB);
  F->insert(MBBIter, nextMBB);
  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
  nextMBB->splice(nextMBB->begin(), thisMBB,
                  llvm::next(MachineBasicBlock::iterator(mInstr)),
                  thisMBB->end());
  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
  // Update thisMBB to fall through to newMBB
  thisMBB->addSuccessor(newMBB);
  // newMBB jumps to newMBB and fall through to nextMBB
  newMBB->addSuccessor(nextMBB);
  newMBB->addSuccessor(newMBB);
  DebugLoc dl = mInstr->getDebugLoc();
  // Insert instructions into newMBB based on incoming instruction
  assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
Bill Wendling's avatar
Bill Wendling committed
         "unexpected number of operands");
  MachineOperand& destOper = mInstr->getOperand(0);
  MachineOperand* argOpers[2 + X86::AddrNumOperands];
  int numArgs = mInstr->getNumOperands() - 1;
  for (int i=0; i < numArgs; ++i)
    argOpers[i] = &mInstr->getOperand(i+1);
  // x86 address has 4 operands: base, index, scale, and displacement
  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
  int valArgIndx = lastAddrIndx + 1;
  unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
  for (int i=0; i <= lastAddrIndx; ++i)
    (*MIB).addOperand(*argOpers[i]);
  // We only support register and immediate values
  assert((argOpers[valArgIndx]->isReg() ||
          argOpers[valArgIndx]->isImm()) &&

  unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
  if (argOpers[valArgIndx]->isReg())
    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
    MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
  (*MIB).addOperand(*argOpers[valArgIndx]);

  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
  MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
  MIB.addReg(t1);
  MIB.addReg(t2);

  // Generate movc
  unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
  MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
  MIB.addReg(t2);
  MIB.addReg(t1);

  // Cmp and exchange if none has modified the memory location
  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
  for (int i=0; i <= lastAddrIndx; ++i)
    (*MIB).addOperand(*argOpers[i]);
  MIB.addReg(t3);
  assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
  (*MIB).setMemRefs(mInstr->memoperands_begin(),
                    mInstr->memoperands_end());
  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
  mInstr->eraseFromParent();   // The pseudo instruction is gone now.
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
                            unsigned numArgs, bool memArg) const {
  assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
         "Target must have SSE4.2 or AVX features enabled");

  DebugLoc dl = MI->getDebugLoc();
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  unsigned Opc;
  if (!Subtarget->hasAVX()) {
    if (memArg)
      Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
    else
      Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
  } else {
    if (memArg)
      Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
    else
      Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
  }
  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
  for (unsigned i = 0; i < numArgs; ++i) {
    MachineOperand &Op = MI->getOperand(i+1);
    if (!(Op.isReg() && Op.isImplicit()))
      MIB.addOperand(Op);
  }
  BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
MachineBasicBlock *
X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
  DebugLoc dl = MI->getDebugLoc();
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
NAKAMURA Takumi's avatar
NAKAMURA Takumi committed

  // Address into RAX/EAX, other two args into ECX, EDX.
  unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
  unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
  for (int i = 0; i < X86::AddrNumOperands; ++i)
    MIB.addOperand(MI->getOperand(i));
NAKAMURA Takumi's avatar
NAKAMURA Takumi committed

  unsigned ValOps = X86::AddrNumOperands;
  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
    .addReg(MI->getOperand(ValOps).getReg());
  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
    .addReg(MI->getOperand(ValOps+1).getReg());

  // The instruction doesn't actually take any operands though.
  BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
NAKAMURA Takumi's avatar
NAKAMURA Takumi committed

  MI->eraseFromParent(); // The pseudo is gone now.
  return BB;
}

MachineBasicBlock *
X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
  DebugLoc dl = MI->getDebugLoc();
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
NAKAMURA Takumi's avatar
NAKAMURA Takumi committed

  // First arg in ECX, the second in EAX.
  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
    .addReg(MI->getOperand(0).getReg());
  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
    .addReg(MI->getOperand(1).getReg());
NAKAMURA Takumi's avatar
NAKAMURA Takumi committed

  // The instruction doesn't actually take any operands though.
  BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
NAKAMURA Takumi's avatar
NAKAMURA Takumi committed

  MI->eraseFromParent(); // The pseudo is gone now.
  return BB;
}

10509 10510 10511 10512 10513 10514 10515 10516 10517 10518 10519 10520 10521 10522 10523 10524 10525 10526 10527 10528 10529 10530 10531 10532 10533 10534 10535 10536 10537 10538 10539 10540 10541 10542 10543 10544 10545 10546 10547 10548 10549 10550 10551 10552 10553 10554 10555 10556 10557 10558 10559 10560 10561 10562 10563 10564 10565 10566 10567 10568 10569 10570 10571 10572 10573 10574 10575 10576 10577 10578 10579 10580 10581 10582 10583 10584 10585 10586 10587 10588 10589 10590 10591 10592 10593 10594 10595 10596 10597 10598 10599 10600 10601 10602 10603 10604 10605 10606 10607 10608 10609 10610 10611 10612 10613 10614 10615 10616 10617 10618 10619 10620 10621 10622 10623 10624 10625 10626 10627 10628 10629 10630 10631 10632 10633 10634 10635 10636 10637 10638 10639 10640 10641 10642 10643 10644 10645 10646 10647 10648 10649 10650 10651 10652 10653 10654 10655 10656 10657 10658 10659 10660 10661 10662 10663 10664 10665 10666 10667 10668 10669 10670 10671 10672 10673 10674 10675 10676 10677 10678 10679 10680 10681 10682 10683 10684 10685 10686 10687 10688 10689 10690 10691 10692 10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706 10707 10708 10709 10710 10711 10712 10713 10714 10715 10716 10717 10718 10719 10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730 10731 10732 10733 10734 10735 10736 10737 10738 10739 10740 10741 10742 10743 10744 10745 10746 10747 10748 10749 10750 10751 10752 10753 10754 10755 10756 10757 10758 10759 10760 10761 10762 10763
MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(
                   MachineInstr *MI,
                   MachineBasicBlock *MBB) const {
  // Emit va_arg instruction on X86-64.

  // Operands to this pseudo-instruction:
  // 0  ) Output        : destination address (reg)
  // 1-5) Input         : va_list address (addr, i64mem)
  // 6  ) ArgSize       : Size (in bytes) of vararg type
  // 7  ) ArgMode       : 0=overflow only, 1=use gp_offset, 2=use fp_offset
  // 8  ) Align         : Alignment of type
  // 9  ) EFLAGS (implicit-def)

  assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
  assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");

  unsigned DestReg = MI->getOperand(0).getReg();
  MachineOperand &Base = MI->getOperand(1);
  MachineOperand &Scale = MI->getOperand(2);
  MachineOperand &Index = MI->getOperand(3);
  MachineOperand &Disp = MI->getOperand(4);
  MachineOperand &Segment = MI->getOperand(5);
  unsigned ArgSize = MI->getOperand(6).getImm();
  unsigned ArgMode = MI->getOperand(7).getImm();
  unsigned Align = MI->getOperand(8).getImm();

  // Memory Reference
  assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();

  // Machine Information
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
  const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
  DebugLoc DL = MI->getDebugLoc();

  // struct va_list {
  //   i32   gp_offset
  //   i32   fp_offset
  //   i64   overflow_area (address)
  //   i64   reg_save_area (address)
  // }
  // sizeof(va_list) = 24
  // alignment(va_list) = 8

  unsigned TotalNumIntRegs = 6;
  unsigned TotalNumXMMRegs = 8;
  bool UseGPOffset = (ArgMode == 1);
  bool UseFPOffset = (ArgMode == 2);
  unsigned MaxOffset = TotalNumIntRegs * 8 +
                       (UseFPOffset ? TotalNumXMMRegs * 16 : 0);

  /* Align ArgSize to a multiple of 8 */
  unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
  bool NeedsAlign = (Align > 8);

  MachineBasicBlock *thisMBB = MBB;
  MachineBasicBlock *overflowMBB;
  MachineBasicBlock *offsetMBB;
  MachineBasicBlock *endMBB;

  unsigned OffsetDestReg = 0;    // Argument address computed by offsetMBB
  unsigned OverflowDestReg = 0;  // Argument address computed by overflowMBB
  unsigned OffsetReg = 0;

  if (!UseGPOffset && !UseFPOffset) {
    // If we only pull from the overflow region, we don't create a branch.
    // We don't need to alter control flow.
    OffsetDestReg = 0; // unused
    OverflowDestReg = DestReg;

    offsetMBB = NULL;
    overflowMBB = thisMBB;
    endMBB = thisMBB;
  } else {
    // First emit code to check if gp_offset (or fp_offset) is below the bound.
    // If so, pull the argument from reg_save_area. (branch to offsetMBB)
    // If not, pull from overflow_area. (branch to overflowMBB)
    //
    //       thisMBB
    //         |     .
    //         |        .
    //     offsetMBB   overflowMBB
    //         |        .
    //         |     .
    //        endMBB

    // Registers for the PHI in endMBB
    OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
    OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);

    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
    MachineFunction *MF = MBB->getParent();
    overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    endMBB = MF->CreateMachineBasicBlock(LLVM_BB);

    MachineFunction::iterator MBBIter = MBB;
    ++MBBIter;

    // Insert the new basic blocks
    MF->insert(MBBIter, offsetMBB);
    MF->insert(MBBIter, overflowMBB);
    MF->insert(MBBIter, endMBB);

    // Transfer the remainder of MBB and its successor edges to endMBB.
    endMBB->splice(endMBB->begin(), thisMBB,
                    llvm::next(MachineBasicBlock::iterator(MI)),
                    thisMBB->end());
    endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);

    // Make offsetMBB and overflowMBB successors of thisMBB
    thisMBB->addSuccessor(offsetMBB);
    thisMBB->addSuccessor(overflowMBB);

    // endMBB is a successor of both offsetMBB and overflowMBB
    offsetMBB->addSuccessor(endMBB);
    overflowMBB->addSuccessor(endMBB);

    // Load the offset value into a register
    OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
    BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
      .addOperand(Base)
      .addOperand(Scale)
      .addOperand(Index)
      .addDisp(Disp, UseFPOffset ? 4 : 0)
      .addOperand(Segment)
      .setMemRefs(MMOBegin, MMOEnd);

    // Check if there is enough room left to pull this argument.
    BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
      .addReg(OffsetReg)
      .addImm(MaxOffset + 8 - ArgSizeA8);

    // Branch to "overflowMBB" if offset >= max
    // Fall through to "offsetMBB" otherwise
    BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
      .addMBB(overflowMBB);
  }

  // In offsetMBB, emit code to use the reg_save_area.
  if (offsetMBB) {
    assert(OffsetReg != 0);

    // Read the reg_save_area address.
    unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
    BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
      .addOperand(Base)
      .addOperand(Scale)
      .addOperand(Index)
      .addDisp(Disp, 16)
      .addOperand(Segment)
      .setMemRefs(MMOBegin, MMOEnd);

    // Zero-extend the offset
    unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
        .addImm(0)
        .addReg(OffsetReg)
        .addImm(X86::sub_32bit);

    // Add the offset to the reg_save_area to get the final address.
    BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
      .addReg(OffsetReg64)
      .addReg(RegSaveReg);

    // Compute the offset for the next argument
    unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
    BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
      .addReg(OffsetReg)
      .addImm(UseFPOffset ? 16 : 8);

    // Store it back into the va_list.
    BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
      .addOperand(Base)
      .addOperand(Scale)
      .addOperand(Index)
      .addDisp(Disp, UseFPOffset ? 4 : 0)
      .addOperand(Segment)
      .addReg(NextOffsetReg)
      .setMemRefs(MMOBegin, MMOEnd);

    // Jump to endMBB
    BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
      .addMBB(endMBB);
  }

  //
  // Emit code to use overflow area
  //

  // Load the overflow_area address into a register.
  unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
  BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
    .addOperand(Base)
    .addOperand(Scale)
    .addOperand(Index)
    .addDisp(Disp, 8)
    .addOperand(Segment)
    .setMemRefs(MMOBegin, MMOEnd);

  // If we need to align it, do so. Otherwise, just copy the address
  // to OverflowDestReg.
  if (NeedsAlign) {
    // Align the overflow address
    assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
    unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);

    // aligned_addr = (addr + (align-1)) & ~(align-1)
    BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
      .addReg(OverflowAddrReg)
      .addImm(Align-1);

    BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
      .addReg(TmpReg)
      .addImm(~(uint64_t)(Align-1));
  } else {
    BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
      .addReg(OverflowAddrReg);
  }

  // Compute the next overflow address after this argument.
  // (the overflow address should be kept 8-byte aligned)
  unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
  BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
    .addReg(OverflowDestReg)
    .addImm(ArgSizeA8);

  // Store the new overflow address.
  BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
    .addOperand(Base)
    .addOperand(Scale)
    .addOperand(Index)
    .addDisp(Disp, 8)
    .addOperand(Segment)
    .addReg(NextAddrReg)
    .setMemRefs(MMOBegin, MMOEnd);

  // If we branched, emit the PHI to the front of endMBB.
  if (offsetMBB) {
    BuildMI(*endMBB, endMBB->begin(), DL,
            TII->get(X86::PHI), DestReg)
      .addReg(OffsetDestReg).addMBB(offsetMBB)
      .addReg(OverflowDestReg).addMBB(overflowMBB);
  }

  // Erase the pseudo instruction
  MI->eraseFromParent();

  return endMBB;
}

MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
                                                 MachineInstr *MI,
                                                 MachineBasicBlock *MBB) const {
  // Emit code to save XMM registers to the stack. The ABI says that the
  // number of registers to save is given in %al, so it's theoretically
  // possible to do an indirect jump trick to avoid saving all of them,
  // however this code takes a simpler approach and just executes all
  // of the stores if %al is non-zero. It's less code, and it's probably
  // easier on the hardware branch predictor, and stores aren't all that
  // expensive anyway.

  // Create the new basic blocks. One block contains all the XMM stores,
  // and one block is the final destination regardless of whether any
  // stores were performed.
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
  MachineFunction *F = MBB->getParent();
  MachineFunction::iterator MBBIter = MBB;
  ++MBBIter;
  MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
  F->insert(MBBIter, XMMSaveMBB);
  F->insert(MBBIter, EndMBB);

  // Transfer the remainder of MBB and its successor edges to EndMBB.
  EndMBB->splice(EndMBB->begin(), MBB,
                 llvm::next(MachineBasicBlock::iterator(MI)),
                 MBB->end());
  EndMBB->transferSuccessorsAndUpdatePHIs(MBB);

  // The original block will now fall through to the XMM save block.
  MBB->addSuccessor(XMMSaveMBB);
  // The XMMSaveMBB will fall through to the end block.
  XMMSaveMBB->addSuccessor(EndMBB);

  // Now add the instructions.
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  DebugLoc DL = MI->getDebugLoc();

  unsigned CountReg = MI->getOperand(0).getReg();
  int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
  int64_t VarArgsFPOffset = MI->getOperand(2).getImm();

  if (!Subtarget->isTargetWin64()) {
    // If %al is 0, branch around the XMM save block.
    BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
    BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
    MBB->addSuccessor(EndMBB);
  }

  // In the XMM save block, save all the XMM argument registers.
  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
    int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
    MachineMemOperand *MMO =
          MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
    BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
      .addFrameIndex(RegSaveFrameIndex)
      .addImm(/*Scale=*/1)
      .addReg(/*IndexReg=*/0)
      .addImm(/*Disp=*/Offset)
      .addReg(/*Segment=*/0)
      .addReg(MI->getOperand(i).getReg())
      .addMemOperand(MMO);
  MI->eraseFromParent();   // The pseudo instruction is gone now.
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
                                     MachineBasicBlock *BB) const {
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  DebugLoc DL = MI->getDebugLoc();
  // To "insert" a SELECT_CC instruction, we actually have to insert the
  // diamond control-flow pattern.  The incoming instruction knows the
  // destination vreg to set, the condition code register to branch on, the
  // true/false values to select between, and a branch opcode to use.
  const BasicBlock *LLVM_BB = BB->getBasicBlock();
  MachineFunction::iterator It = BB;
  ++It;
  //  thisMBB:
  //  ...
  //   TrueVal = ...
  //   cmpTY ccX, r1, r2
  //   bCC copy1MBB
  //   fallthrough --> copy0MBB
  MachineBasicBlock *thisMBB = BB;
  MachineFunction *F = BB->getParent();
  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
  F->insert(It, copy0MBB);
  F->insert(It, sinkMBB);

  // If the EFLAGS register isn't dead in the terminator, then claim that it's
  // live into the sink and copy blocks.
  const MachineFunction *MF = BB->getParent();
  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
  BitVector ReservedRegs = TRI->getReservedRegs(*MF);

  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
    const MachineOperand &MO = MI->getOperand(I);
    if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
    unsigned Reg = MO.getReg();
    if (Reg != X86::EFLAGS) continue;
    copy0MBB->addLiveIn(Reg);
    sinkMBB->addLiveIn(Reg);
  }

  // Transfer the remainder of BB and its successor edges to sinkMBB.
  sinkMBB->splice(sinkMBB->begin(), BB,
                  llvm::next(MachineBasicBlock::iterator(MI)),
                  BB->end());
  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);

  // Add the true and fallthrough blocks as its successors.
  BB->addSuccessor(copy0MBB);
  BB->addSuccessor(sinkMBB);

  // Create the conditional branch instruction.
  unsigned Opc =
    X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
  BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);

  //  copy0MBB:
  //   %FalseValue = ...
  //   # fallthrough to sinkMBB
  //  sinkMBB:
  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
  //  ...
  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
          TII->get(X86::PHI), MI->getOperand(0).getReg())
    .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);

  MI->eraseFromParent();   // The pseudo instruction is gone now.
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
                                          MachineBasicBlock *BB) const {
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  DebugLoc DL = MI->getDebugLoc();

  // The lowering is pretty easy: we're just emitting the call to _alloca.  The
  // non-trivial part is impdef of ESP.

  if (Subtarget->isTargetWin64()) {
    if (Subtarget->isTargetCygMing()) {
      // ___chkstk(Mingw64):
      // Clobbers R10, R11, RAX and EFLAGS.
      // Updates RSP.
      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
        .addExternalSymbol("___chkstk")
        .addReg(X86::RAX, RegState::Implicit)
        .addReg(X86::RSP, RegState::Implicit)
        .addReg(X86::RAX, RegState::Define | RegState::Implicit)
        .addReg(X86::RSP, RegState::Define | RegState::Implicit)
        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
    } else {
      // __chkstk(MSVCRT): does not update stack pointer.
      // Clobbers R10, R11 and EFLAGS.
      // FIXME: RAX(allocated size) might be reused and not killed.
      BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
        .addExternalSymbol("__chkstk")
        .addReg(X86::RAX, RegState::Implicit)
        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
      // RAX has the offset to subtracted from RSP.
      BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
        .addReg(X86::RSP)
        .addReg(X86::RAX);
    }
  } else {
    const char *StackProbeSymbol =
      Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";

    BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
      .addExternalSymbol(StackProbeSymbol)
      .addReg(X86::EAX, RegState::Implicit)
      .addReg(X86::ESP, RegState::Implicit)
      .addReg(X86::EAX, RegState::Define | RegState::Implicit)
      .addReg(X86::ESP, RegState::Define | RegState::Implicit)
      .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
  }
  MI->eraseFromParent();   // The pseudo instruction is gone now.
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
                                      MachineBasicBlock *BB) const {
  // This is pretty easy.  We're taking the value that we received from
  // our load from the relocation, sticking it in either RDI (x86-64)
  // or EAX and doing an indirect call.  The return value will then
  // be in the normal return register.
Michael J. Spencer's avatar
Michael J. Spencer committed
  const X86InstrInfo *TII
    = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
  DebugLoc DL = MI->getDebugLoc();
  MachineFunction *F = BB->getParent();

  assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
  assert(MI->getOperand(3).isGlobal() && "This should be a global");
  if (Subtarget->is64Bit()) {
    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                      TII->get(X86::MOV64rm), X86::RDI)
    .addReg(X86::RIP)
    .addImm(0).addReg(0)
Michael J. Spencer's avatar
Michael J. Spencer committed
    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                      MI->getOperand(3).getTargetFlags())
    .addReg(0);
    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
    addDirectMem(MIB, X86::RDI);
  } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
                                      TII->get(X86::MOV32rm), X86::EAX)
Michael J. Spencer's avatar
Michael J. Spencer committed
    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
                      MI->getOperand(3).getTargetFlags())
    .addReg(0);
    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
    addDirectMem(MIB, X86::EAX);
    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,