Newer
Older
Owen Anderson
committed
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
Owen Anderson
committed
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
}
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
/// are assumed to be legal.
bool
X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
Owen Anderson
committed
EVT VT) const {
// Very little shuffling can be done for 64-bit vectors right now.
if (VT.getSizeInBits() == 64)
return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
// FIXME: pshufb, blends, shifts.
return (VT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, VT) ||
isSHUFPMask(M, VT) ||
isPSHUFDMask(M, VT) ||
isPSHUFHWMask(M, VT) ||
isPSHUFLWMask(M, VT) ||
isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
isUNPCKLMask(M, VT) ||
isUNPCKHMask(M, VT) ||
isUNPCKL_v_undef_Mask(M, VT) ||
isUNPCKH_v_undef_Mask(M, VT));
X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
Owen Anderson
committed
EVT VT) const {
unsigned NumElts = VT.getVectorNumElements();
// FIXME: This collection of masks seems suspect.
if (NumElts == 2)
return true;
if (NumElts == 4 && VT.getSizeInBits() == 128) {
return (isMOVLMask(Mask, VT) ||
isCommutedMOVLMask(Mask, VT, true) ||
isSHUFPMask(Mask, VT) ||
isCommutedSHUFPMask(Mask, VT));
}
return false;
}
//===----------------------------------------------------------------------===//
// X86 Scheduler Hooks
//===----------------------------------------------------------------------===//
// private utility function
MachineBasicBlock *
X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
MachineBasicBlock *MBB,
unsigned regOpc,
unsigned LoadOpc,
unsigned CXchgOpc,
unsigned notOpc,
unsigned EAXreg,
TargetRegisterClass *RC,
bool invSrc) const {
// For the atomic bitwise operator, we generate
// thisMBB:
// newMBB:
// ld t1 = [bitinstr.addr]
// op t2 = t1, [bitinstr.val]
// mov EAX = t1
// lcs dest = [bitinstr.addr], t2 [EAX is implicit]
// bz newMBB
// fallthrough -->nextMBB
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction::iterator MBBIter = MBB;
/// First build the CFG
MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
// Transfer the remainder of thisMBB and its successor edges to nextMBB.
nextMBB->splice(nextMBB->begin(), thisMBB,
llvm::next(MachineBasicBlock::iterator(bInstr)),
thisMBB->end());
nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
// newMBB jumps to itself and fall through to nextMBB
newMBB->addSuccessor(nextMBB);
newMBB->addSuccessor(newMBB);
// Insert instructions into newMBB based on incoming instruction
assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
DebugLoc dl = bInstr->getDebugLoc();
MachineOperand& destOper = bInstr->getOperand(0);
MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = bInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &bInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
unsigned tt = F->getRegInfo().createVirtualRegister(RC);
MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
assert((argOpers[valArgIndx]->isReg() ||
argOpers[valArgIndx]->isImm()) &&
if (argOpers[valArgIndx]->isReg())
MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
(*MIB).addOperand(*argOpers[valArgIndx]);
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
MIB.addReg(t2);
assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
// private utility function: 64 bit atomics on 32 bit host.
MachineBasicBlock *
X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MachineBasicBlock *MBB,
unsigned regOpcL,
unsigned regOpcH,
unsigned immOpcL,
unsigned immOpcH,
bool invSrc) const {
// For the atomic bitwise operator, we generate
// thisMBB (instructions are in pairs, except cmpxchg8b)
// ld t1,t2 = [bitinstr.addr]
// newMBB:
// out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
// op t5, t6 <- out1, out2, [bitinstr.val]
// (for SWAP, substitute: mov t5, t6 <- [bitinstr.val])
// mov ECX, EBX <- t5, t6
// mov EAX, EDX <- t1, t2
// cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit]
// mov t3, t4 <- EAX, EDX
// bz newMBB
// result in out1, out2
// fallthrough -->nextMBB
const TargetRegisterClass *RC = X86::GR32RegisterClass;
const unsigned LoadOpc = X86::MOV32rm;
const unsigned NotOpc = X86::NOT32r;
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction::iterator MBBIter = MBB;
++MBBIter;
/// First build the CFG
MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
// Transfer the remainder of thisMBB and its successor edges to nextMBB.
nextMBB->splice(nextMBB->begin(), thisMBB,
llvm::next(MachineBasicBlock::iterator(bInstr)),
thisMBB->end());
nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
// newMBB jumps to itself and fall through to nextMBB
newMBB->addSuccessor(nextMBB);
newMBB->addSuccessor(newMBB);
DebugLoc dl = bInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
// There are 8 "real" operands plus 9 implicit def/uses, ignored here.
assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
MachineOperand& dest1Oper = bInstr->getOperand(0);
MachineOperand& dest2Oper = bInstr->getOperand(1);
MachineOperand* argOpers[2 + X86::AddrNumOperands];
for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
argOpers[i] = &bInstr->getOperand(i+2);
// We use some of the operands multiple times, so conservatively just
// clear any kill flags that might be present.
if (argOpers[i]->isReg() && argOpers[i]->isUse())
argOpers[i]->setIsKill(false);
}
// x86 address has 5 operands: base, index, scale, displacement, and segment.
int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
(*MIB).addOperand(*argOpers[i]);
MachineOperand newOp3 = *(argOpers[3]);
if (newOp3.isImm())
newOp3.setImm(newOp3.getImm()+4);
else
newOp3.setOffset(newOp3.getOffset()+4);
(*MIB).addOperand(newOp3);
// t3/4 are defined later, at the bottom of the loop
unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
.addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
.addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
Evan Cheng
committed
// The subsequent operations should be using the destination registers of
//the PHI instructions.
if (invSrc) {
Evan Cheng
committed
t1 = F->getRegInfo().createVirtualRegister(RC);
t2 = F->getRegInfo().createVirtualRegister(RC);
MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
Evan Cheng
committed
t1 = dest1Oper.getReg();
t2 = dest2Oper.getReg();
int valArgIndx = lastAddrIndx + 1;
assert((argOpers[valArgIndx]->isReg() ||
"invalid operand");
unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
if (argOpers[valArgIndx]->isReg())
MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
if (regOpcL != X86::MOV32rr)
Evan Cheng
committed
MIB.addReg(t1);
(*MIB).addOperand(*argOpers[valArgIndx]);
assert(argOpers[valArgIndx + 1]->isReg() ==
assert(argOpers[valArgIndx + 1]->isImm() ==
if (argOpers[valArgIndx + 1]->isReg())
MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
if (regOpcH != X86::MOV32rr)
Evan Cheng
committed
MIB.addReg(t2);
(*MIB).addOperand(*argOpers[valArgIndx + 1]);
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
MIB.addReg(t2);
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
MIB.addReg(t5);
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
MIB.addReg(t6);
MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
MIB.addReg(X86::EAX);
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
MIB.addReg(X86::EDX);
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
// private utility function
MachineBasicBlock *
X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
MachineBasicBlock *MBB,
unsigned cmovOpc) const {
// For the atomic min/max operator, we generate
// thisMBB:
// newMBB:
// mov t2 = [min/max.val]
// cmp t1, t2
// cmov[cond] t2 = t1
// lcs dest = [bitinstr.addr], t2 [EAX is implicit]
// bz newMBB
// fallthrough -->nextMBB
//
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction::iterator MBBIter = MBB;
/// First build the CFG
MachineFunction *F = MBB->getParent();
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
// Transfer the remainder of thisMBB and its successor edges to nextMBB.
nextMBB->splice(nextMBB->begin(), thisMBB,
llvm::next(MachineBasicBlock::iterator(mInstr)),
thisMBB->end());
nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
// newMBB jumps to newMBB and fall through to nextMBB
newMBB->addSuccessor(nextMBB);
newMBB->addSuccessor(newMBB);
DebugLoc dl = mInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
MachineOperand& destOper = mInstr->getOperand(0);
MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = mInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &mInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
// We only support register and immediate values
assert((argOpers[valArgIndx]->isReg() ||
argOpers[valArgIndx]->isImm()) &&
unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
if (argOpers[valArgIndx]->isReg())
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
(*MIB).addOperand(*argOpers[valArgIndx]);
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
MIB.addReg(t1);
MIB.addReg(t2);
// Generate movc
unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
MIB.addReg(t2);
MIB.addReg(t1);
// Cmp and exchange if none has modified the memory location
MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
for (int i=0; i <= lastAddrIndx; ++i)
(*MIB).addOperand(*argOpers[i]);
MIB.addReg(t3);
assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
(*MIB).setMemRefs(mInstr->memoperands_begin(),
mInstr->memoperands_end());
Jakob Stoklund Olesen
committed
MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
MIB.addReg(X86::EAX);
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
mInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
Bruno Cardoso Lopes
committed
// or XMM0_V32I8 in AVX all of this code can be replaced with that
// in the .td file.
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
Bruno Cardoso Lopes
committed
assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
"Target must have SSE4.2 or AVX features enabled");
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
unsigned Opc;
Bruno Cardoso Lopes
committed
if (!Subtarget->hasAVX()) {
if (memArg)
Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
else
Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
} else {
if (memArg)
Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
else
Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
}
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
for (unsigned i = 0; i < numArgs; ++i) {
MachineOperand &Op = MI->getOperand(i+1);
if (!(Op.isReg() && Op.isImplicit()))
MIB.addOperand(Op);
}
BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
MI->eraseFromParent();
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// Address into RAX/EAX, other two args into ECX, EDX.
unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
for (int i = 0; i < X86::AddrNumOperands; ++i)
MIB.addOperand(MI->getOperand(i));
unsigned ValOps = X86::AddrNumOperands;
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI->getOperand(ValOps).getReg());
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
.addReg(MI->getOperand(ValOps+1).getReg());
// The instruction doesn't actually take any operands though.
BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
// First arg in ECX, the second in EAX.
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI->getOperand(0).getReg());
BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
.addReg(MI->getOperand(1).getReg());
// The instruction doesn't actually take any operands though.
BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
MI->eraseFromParent(); // The pseudo is gone now.
return BB;
}
10509
10510
10511
10512
10513
10514
10515
10516
10517
10518
10519
10520
10521
10522
10523
10524
10525
10526
10527
10528
10529
10530
10531
10532
10533
10534
10535
10536
10537
10538
10539
10540
10541
10542
10543
10544
10545
10546
10547
10548
10549
10550
10551
10552
10553
10554
10555
10556
10557
10558
10559
10560
10561
10562
10563
10564
10565
10566
10567
10568
10569
10570
10571
10572
10573
10574
10575
10576
10577
10578
10579
10580
10581
10582
10583
10584
10585
10586
10587
10588
10589
10590
10591
10592
10593
10594
10595
10596
10597
10598
10599
10600
10601
10602
10603
10604
10605
10606
10607
10608
10609
10610
10611
10612
10613
10614
10615
10616
10617
10618
10619
10620
10621
10622
10623
10624
10625
10626
10627
10628
10629
10630
10631
10632
10633
10634
10635
10636
10637
10638
10639
10640
10641
10642
10643
10644
10645
10646
10647
10648
10649
10650
10651
10652
10653
10654
10655
10656
10657
10658
10659
10660
10661
10662
10663
10664
10665
10666
10667
10668
10669
10670
10671
10672
10673
10674
10675
10676
10677
10678
10679
10680
10681
10682
10683
10684
10685
10686
10687
10688
10689
10690
10691
10692
10693
10694
10695
10696
10697
10698
10699
10700
10701
10702
10703
10704
10705
10706
10707
10708
10709
10710
10711
10712
10713
10714
10715
10716
10717
10718
10719
10720
10721
10722
10723
10724
10725
10726
10727
10728
10729
10730
10731
10732
10733
10734
10735
10736
10737
10738
10739
10740
10741
10742
10743
10744
10745
10746
10747
10748
10749
10750
10751
10752
10753
10754
10755
10756
10757
10758
10759
10760
10761
10762
10763
MachineBasicBlock *
X86TargetLowering::EmitVAARG64WithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
// Emit va_arg instruction on X86-64.
// Operands to this pseudo-instruction:
// 0 ) Output : destination address (reg)
// 1-5) Input : va_list address (addr, i64mem)
// 6 ) ArgSize : Size (in bytes) of vararg type
// 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset
// 8 ) Align : Alignment of type
// 9 ) EFLAGS (implicit-def)
assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
unsigned DestReg = MI->getOperand(0).getReg();
MachineOperand &Base = MI->getOperand(1);
MachineOperand &Scale = MI->getOperand(2);
MachineOperand &Index = MI->getOperand(3);
MachineOperand &Disp = MI->getOperand(4);
MachineOperand &Segment = MI->getOperand(5);
unsigned ArgSize = MI->getOperand(6).getImm();
unsigned ArgMode = MI->getOperand(7).getImm();
unsigned Align = MI->getOperand(8).getImm();
// Memory Reference
assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
// Machine Information
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
DebugLoc DL = MI->getDebugLoc();
// struct va_list {
// i32 gp_offset
// i32 fp_offset
// i64 overflow_area (address)
// i64 reg_save_area (address)
// }
// sizeof(va_list) = 24
// alignment(va_list) = 8
unsigned TotalNumIntRegs = 6;
unsigned TotalNumXMMRegs = 8;
bool UseGPOffset = (ArgMode == 1);
bool UseFPOffset = (ArgMode == 2);
unsigned MaxOffset = TotalNumIntRegs * 8 +
(UseFPOffset ? TotalNumXMMRegs * 16 : 0);
/* Align ArgSize to a multiple of 8 */
unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
bool NeedsAlign = (Align > 8);
MachineBasicBlock *thisMBB = MBB;
MachineBasicBlock *overflowMBB;
MachineBasicBlock *offsetMBB;
MachineBasicBlock *endMBB;
unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB
unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB
unsigned OffsetReg = 0;
if (!UseGPOffset && !UseFPOffset) {
// If we only pull from the overflow region, we don't create a branch.
// We don't need to alter control flow.
OffsetDestReg = 0; // unused
OverflowDestReg = DestReg;
offsetMBB = NULL;
overflowMBB = thisMBB;
endMBB = thisMBB;
} else {
// First emit code to check if gp_offset (or fp_offset) is below the bound.
// If so, pull the argument from reg_save_area. (branch to offsetMBB)
// If not, pull from overflow_area. (branch to overflowMBB)
//
// thisMBB
// | .
// | .
// offsetMBB overflowMBB
// | .
// | .
// endMBB
// Registers for the PHI in endMBB
OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *MF = MBB->getParent();
overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineFunction::iterator MBBIter = MBB;
++MBBIter;
// Insert the new basic blocks
MF->insert(MBBIter, offsetMBB);
MF->insert(MBBIter, overflowMBB);
MF->insert(MBBIter, endMBB);
// Transfer the remainder of MBB and its successor edges to endMBB.
endMBB->splice(endMBB->begin(), thisMBB,
llvm::next(MachineBasicBlock::iterator(MI)),
thisMBB->end());
endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Make offsetMBB and overflowMBB successors of thisMBB
thisMBB->addSuccessor(offsetMBB);
thisMBB->addSuccessor(overflowMBB);
// endMBB is a successor of both offsetMBB and overflowMBB
offsetMBB->addSuccessor(endMBB);
overflowMBB->addSuccessor(endMBB);
// Load the offset value into a register
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// Check if there is enough room left to pull this argument.
BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
.addReg(OffsetReg)
.addImm(MaxOffset + 8 - ArgSizeA8);
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
.addMBB(overflowMBB);
}
// In offsetMBB, emit code to use the reg_save_area.
if (offsetMBB) {
assert(OffsetReg != 0);
// Read the reg_save_area address.
unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 16)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// Zero-extend the offset
unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
.addImm(0)
.addReg(OffsetReg)
.addImm(X86::sub_32bit);
// Add the offset to the reg_save_area to get the final address.
BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
.addReg(OffsetReg64)
.addReg(RegSaveReg);
// Compute the offset for the next argument
unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
.addReg(OffsetReg)
.addImm(UseFPOffset ? 16 : 8);
// Store it back into the va_list.
BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, UseFPOffset ? 4 : 0)
.addOperand(Segment)
.addReg(NextOffsetReg)
.setMemRefs(MMOBegin, MMOEnd);
// Jump to endMBB
BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
.addMBB(endMBB);
}
//
// Emit code to use overflow area
//
// Load the overflow_area address into a register.
unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 8)
.addOperand(Segment)
.setMemRefs(MMOBegin, MMOEnd);
// If we need to align it, do so. Otherwise, just copy the address
// to OverflowDestReg.
if (NeedsAlign) {
// Align the overflow address
assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
// aligned_addr = (addr + (align-1)) & ~(align-1)
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
.addReg(OverflowAddrReg)
.addImm(Align-1);
BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
.addReg(TmpReg)
.addImm(~(uint64_t)(Align-1));
} else {
BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
.addReg(OverflowAddrReg);
}
// Compute the next overflow address after this argument.
// (the overflow address should be kept 8-byte aligned)
unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
.addReg(OverflowDestReg)
.addImm(ArgSizeA8);
// Store the new overflow address.
BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
.addOperand(Base)
.addOperand(Scale)
.addOperand(Index)
.addDisp(Disp, 8)
.addOperand(Segment)
.addReg(NextAddrReg)
.setMemRefs(MMOBegin, MMOEnd);
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
BuildMI(*endMBB, endMBB->begin(), DL,
TII->get(X86::PHI), DestReg)
.addReg(OffsetDestReg).addMBB(offsetMBB)
.addReg(OverflowDestReg).addMBB(overflowMBB);
}
// Erase the pseudo instruction
MI->eraseFromParent();
return endMBB;
}
10764
10765
10766
10767
10768
10769
10770
10771
10772
10773
10774
10775
10776
10777
10778
10779
10780
10781
10782
10783
10784
10785
10786
10787
MachineBasicBlock *
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
MachineInstr *MI,
MachineBasicBlock *MBB) const {
// Emit code to save XMM registers to the stack. The ABI says that the
// number of registers to save is given in %al, so it's theoretically
// possible to do an indirect jump trick to avoid saving all of them,
// however this code takes a simpler approach and just executes all
// of the stores if %al is non-zero. It's less code, and it's probably
// easier on the hardware branch predictor, and stores aren't all that
// expensive anyway.
// Create the new basic blocks. One block contains all the XMM stores,
// and one block is the final destination regardless of whether any
// stores were performed.
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
MachineFunction *F = MBB->getParent();
MachineFunction::iterator MBBIter = MBB;
++MBBIter;
MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(MBBIter, XMMSaveMBB);
F->insert(MBBIter, EndMBB);
// Transfer the remainder of MBB and its successor edges to EndMBB.
EndMBB->splice(EndMBB->begin(), MBB,
llvm::next(MachineBasicBlock::iterator(MI)),
MBB->end());
EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
// The original block will now fall through to the XMM save block.
MBB->addSuccessor(XMMSaveMBB);
// The XMMSaveMBB will fall through to the end block.
XMMSaveMBB->addSuccessor(EndMBB);
// Now add the instructions.
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
unsigned CountReg = MI->getOperand(0).getReg();
int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
if (!Subtarget->isTargetWin64()) {
// If %al is 0, branch around the XMM save block.
BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
MBB->addSuccessor(EndMBB);
}
// In the XMM save block, save all the XMM argument registers.
for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
F->getMachineMemOperand(
MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
MachineMemOperand::MOStore,
/*Size=*/16, /*Align=*/16);
BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
.addFrameIndex(RegSaveFrameIndex)
.addImm(/*Scale=*/1)
.addReg(/*IndexReg=*/0)
.addImm(/*Disp=*/Offset)
.addReg(/*Segment=*/0)
.addReg(MI->getOperand(i).getReg())
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
return EndMBB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
// destination vreg to set, the condition code register to branch on, the
// true/false values to select between, and a branch opcode to use.
const BasicBlock *LLVM_BB = BB->getBasicBlock();
MachineFunction::iterator It = BB;
++It;
// thisMBB:
// ...
// TrueVal = ...
// cmpTY ccX, r1, r2
// bCC copy1MBB
// fallthrough --> copy0MBB
MachineBasicBlock *thisMBB = BB;
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
// If the EFLAGS register isn't dead in the terminator, then claim that it's
// live into the sink and copy blocks.
const MachineFunction *MF = BB->getParent();
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
BitVector ReservedRegs = TRI->getReservedRegs(*MF);
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
unsigned Reg = MO.getReg();
if (Reg != X86::EFLAGS) continue;
copy0MBB->addLiveIn(Reg);
sinkMBB->addLiveIn(Reg);
}
// Transfer the remainder of BB and its successor edges to sinkMBB.
sinkMBB->splice(sinkMBB->begin(), BB,
llvm::next(MachineBasicBlock::iterator(MI)),
BB->end());
sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
// Add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
// Create the conditional branch instruction.
unsigned Opc =
X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
copy0MBB->addSuccessor(sinkMBB);
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BuildMI(*sinkMBB, sinkMBB->begin(), DL,
TII->get(X86::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
MachineBasicBlock *
X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
assert(!Subtarget->isTargetEnvMacho());
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
10922
10923
10924
10925
10926
10927
10928
10929
10930
10931
10932
10933
10934
10935
10936
10937
10938
10939
10940
10941
10942
10943
10944
10945
10946
10947
10948
if (Subtarget->isTargetWin64()) {
if (Subtarget->isTargetCygMing()) {
// ___chkstk(Mingw64):
// Clobbers R10, R11, RAX and EFLAGS.
// Updates RSP.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("___chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::RSP, RegState::Implicit)
.addReg(X86::RAX, RegState::Define | RegState::Implicit)
.addReg(X86::RSP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// __chkstk(MSVCRT): does not update stack pointer.
// Clobbers R10, R11 and EFLAGS.
// FIXME: RAX(allocated size) might be reused and not killed.
BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
.addExternalSymbol("__chkstk")
.addReg(X86::RAX, RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// RAX has the offset to subtracted from RSP.
BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
.addReg(X86::RSP)
.addReg(X86::RAX);
}
} else {
const char *StackProbeSymbol =
Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol(StackProbeSymbol)
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit)
.addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
}
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
MachineBasicBlock *
X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
MachineBasicBlock *BB) const {
// This is pretty easy. We're taking the value that we received from
// our load from the relocation, sticking it in either RDI (x86-64)
// or EAX and doing an indirect call. The return value will then
// be in the normal return register.
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
assert(MI->getOperand(3).isGlobal() && "This should be a global");
if (Subtarget->is64Bit()) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0).addReg(0)
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,