From 6a089ce0e40abbe4e0f26f05540e3caa60d98a29 Mon Sep 17 00:00:00 2001 From: Sebastian Neubauer Date: Wed, 30 Sep 2020 14:15:24 +0200 Subject: [PATCH] [AMDGPU] Use tablegen for argument indices Use tablegen generic tables to get the index of image intrinsic arguments. Before, the computation of which image intrinsic argument is at which index was scattered in a few places, tablegen, the SDag instruction selection and GlobalISel. This patch changes that, so only tablegen contains code to compute indices and the ImageDimIntrinsicInfo table provides these information. Differential Revision: https://reviews.llvm.org/D86270 --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 14 ++- .../lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h | 31 ----- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h | 27 ++++- .../AMDGPU/AMDGPUInstructionSelector.cpp | 51 ++++---- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 111 +++++++++--------- llvm/lib/Target/AMDGPU/MIMGInstructions.td | 21 +++- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 83 ++++++------- llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 +- 8 files changed, 162 insertions(+), 178 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 918ab3efc0ad..ed96c0896d74 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -688,11 +688,15 @@ class AMDGPUImageDimIntrinsicEval { int NumRSrcArgs = 1; int NumSampArgs = !if(P_.IsSample, 2, 0); int DmaskArgIndex = NumDataArgs; - int VAddrArgIndex = !add(NumDataArgs, NumDmaskArgs); - int GradientArgIndex = !add(NumDataArgs, NumDmaskArgs, NumExtraAddrArgs); - int CoordArgIndex = !add(NumDataArgs, NumDmaskArgs, NumExtraAddrArgs, NumGradientArgs); - int UnormArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, 1); - int TexFailCtrlArgIndex = !add(NumDataArgs, NumDmaskArgs, NumVAddrArgs, NumRSrcArgs, NumSampArgs); + int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs); + int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs); + int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs); + int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1); + int MipArgIndex = LodArgIndex; + int RsrcArgIndex = !add(VAddrArgIndex, NumVAddrArgs); + int SampArgIndex = !add(RsrcArgIndex, NumRSrcArgs); + int UnormArgIndex = !add(SampArgIndex, 1); + int TexFailCtrlArgIndex = !add(SampArgIndex, NumSampArgs); int CachePolicyArgIndex = !add(TexFailCtrlArgIndex, 1); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h index 766750758efc..87bb88cec502 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h @@ -9,7 +9,6 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUGLOBALISELUTILS_H -#include "AMDGPUInstrInfo.h" #include "llvm/CodeGen/Register.h" #include @@ -26,36 +25,6 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg); bool isLegalVOP3PShuffleMask(ArrayRef Mask); -/// Return number of address arguments, and the number of gradients for an image -/// intrinsic. -inline std::pair -getImageNumVAddr(const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, - const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode) { - const AMDGPU::MIMGDimInfo *DimInfo - = AMDGPU::getMIMGDimInfo(ImageDimIntr->Dim); - - int NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0; - int NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0; - int NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0; - int NumVAddr = BaseOpcode->NumExtraArgs + NumGradients + NumCoords + NumLCM; - return {NumVAddr, NumGradients}; -} - -/// Return index of dmask in an gMIR image intrinsic -inline int getDMaskIdx(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode, - int NumDefs) { - assert(!BaseOpcode->Atomic); - return NumDefs + 1 + (BaseOpcode->Store ? 1 : 0); -} - -/// Return first address operand index in a gMIR image intrinsic. -inline int getImageVAddrIdxBegin(const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode, - int NumDefs) { - if (BaseOpcode->Atomic) - return NumDefs + 1 + (BaseOpcode->AtomicX2 ? 2 : 1); - return getDMaskIdx(BaseOpcode, NumDefs) + 1; -} - } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h index a94737b1d3d5..304dcb5d47f1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -52,11 +52,28 @@ struct ImageDimIntrinsicInfo { unsigned Intr; unsigned BaseOpcode; MIMGDim Dim; - unsigned GradientStart; - unsigned CoordStart; - unsigned VAddrEnd; - unsigned GradientTyArg; - unsigned CoordTyArg; + + uint8_t NumGradients; + uint8_t NumDmask; + uint8_t NumData; + uint8_t NumVAddrs; + uint8_t NumArgs; + + uint8_t DMaskIndex; + uint8_t VAddrStart; + uint8_t GradientStart; + uint8_t CoordStart; + uint8_t LodIndex; + uint8_t MipIndex; + uint8_t VAddrEnd; + uint8_t RsrcIndex; + uint8_t SampIndex; + uint8_t UnormIndex; + uint8_t TexFailCtrlIndex; + uint8_t CachePolicyIndex; + + uint8_t GradientTyArg; + uint8_t CoordTyArg; }; const ImageDimIntrinsicInfo *getImageDimIntrinsicInfo(unsigned Intr); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 58d30dc9ddf6..341e28b760af 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1485,34 +1485,27 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( unsigned IntrOpcode = Intr->BaseOpcode; const bool IsGFX10 = STI.getGeneration() >= AMDGPUSubtarget::GFX10; - const int VAddrIdx = getImageVAddrIdxBegin(BaseOpcode, - MI.getNumExplicitDefs()); - int NumVAddr, NumGradients; - std::tie(NumVAddr, NumGradients) = getImageNumVAddr(Intr, BaseOpcode); + const unsigned ArgOffset = MI.getNumExplicitDefs() + 1; Register VDataIn, VDataOut; LLT VDataTy; int NumVDataDwords = -1; bool IsD16 = false; - // XXX - Can we just get the second to last argument for ctrl? - unsigned CtrlIdx; // Index of texfailctrl argument bool Unorm; - if (!BaseOpcode->Sampler) { + if (!BaseOpcode->Sampler) Unorm = true; - CtrlIdx = VAddrIdx + NumVAddr + 1; - } else { - Unorm = MI.getOperand(VAddrIdx + NumVAddr + 2).getImm() != 0; - CtrlIdx = VAddrIdx + NumVAddr + 3; - } + else + Unorm = MI.getOperand(ArgOffset + Intr->UnormIndex).getImm() != 0; bool TFE; bool LWE; bool IsTexFail = false; - if (!parseTexFail(MI.getOperand(CtrlIdx).getImm(), TFE, LWE, IsTexFail)) + if (!parseTexFail(MI.getOperand(ArgOffset + Intr->TexFailCtrlIndex).getImm(), + TFE, LWE, IsTexFail)) return false; - const int Flags = MI.getOperand(CtrlIdx + 2).getImm(); + const int Flags = MI.getOperand(ArgOffset + Intr->NumArgs).getImm(); const bool IsA16 = (Flags & 1) != 0; const bool IsG16 = (Flags & 2) != 0; @@ -1543,9 +1536,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( NumVDataDwords = Is64Bit ? 2 : 1; } } else { - const int DMaskIdx = 2; // Input/output + intrinsic ID. - - DMask = MI.getOperand(DMaskIdx).getImm(); + DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); if (BaseOpcode->Store) { @@ -1576,7 +1567,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( if (LZMappingInfo) { // The legalizer replaced the register with an immediate 0 if we need to // change the opcode. - const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1); + const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->LodIndex); if (Lod.isImm()) { assert(Lod.getImm() == 0); IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l @@ -1585,7 +1576,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( // Optimize _mip away, when 'lod' is zero if (MIPMappingInfo) { - const MachineOperand &Lod = MI.getOperand(VAddrIdx + NumVAddr - 1); + const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->MipIndex); if (Lod.isImm()) { assert(Lod.getImm() == 0); IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip @@ -1608,20 +1599,22 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( bool DLC = false; if (BaseOpcode->Atomic) { GLC = true; // TODO no-return optimization - if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), nullptr, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy( + MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), nullptr, + &SLC, IsGFX10 ? &DLC : nullptr)) return false; } else { - if (!parseCachePolicy(MI.getOperand(CtrlIdx + 1).getImm(), &GLC, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy( + MI.getOperand(ArgOffset + Intr->CachePolicyIndex).getImm(), &GLC, + &SLC, IsGFX10 ? &DLC : nullptr)) return false; } int NumVAddrRegs = 0; int NumVAddrDwords = 0; - for (int I = 0; I < NumVAddr; ++I) { + for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) { // Skip the $noregs and 0s inserted during legalization. - MachineOperand &AddrOp = MI.getOperand(VAddrIdx + I); + MachineOperand &AddrOp = MI.getOperand(ArgOffset + I); if (!AddrOp.isReg()) continue; // XXX - Break? @@ -1684,17 +1677,17 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic( if (VDataIn) MIB.addReg(VDataIn); // vdata input - for (int i = 0; i != NumVAddrRegs; ++i) { - MachineOperand &SrcOp = MI.getOperand(VAddrIdx + i); + for (int I = 0; I != NumVAddrRegs; ++I) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + Intr->VAddrStart + I); if (SrcOp.isReg()) { assert(SrcOp.getReg() != 0); MIB.addReg(SrcOp.getReg()); } } - MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr).getReg()); // rsrc + MIB.addReg(MI.getOperand(ArgOffset + Intr->RsrcIndex).getReg()); if (BaseOpcode->Sampler) - MIB.addReg(MI.getOperand(VAddrIdx + NumVAddr + 1).getReg()); // sampler + MIB.addReg(MI.getOperand(ArgOffset + Intr->SampIndex).getReg()); MIB.addImm(DMask); // dmask diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 0f1eb03f0c27..fa3130ab3fe0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -3920,38 +3920,39 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI, /// Turn a set of s16 typed registers in \p A16AddrRegs into a dword sized /// vector with s16 typed elements. -static void packImageA16AddressToDwords(MachineIRBuilder &B, MachineInstr &MI, - SmallVectorImpl &PackedAddrs, - int AddrIdx, int DimIdx, int EndIdx, - int NumGradients) { +static void packImageA16AddressToDwords( + MachineIRBuilder &B, MachineInstr &MI, + SmallVectorImpl &PackedAddrs, unsigned ArgOffset, + const AMDGPU::ImageDimIntrinsicInfo *Intr, unsigned EndIdx) { const LLT S16 = LLT::scalar(16); const LLT V2S16 = LLT::vector(2, 16); - for (int I = AddrIdx; I < EndIdx; ++I) { - MachineOperand &SrcOp = MI.getOperand(I); + for (unsigned I = Intr->VAddrStart; I < EndIdx; I++) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + I); if (!SrcOp.isReg()) continue; // _L to _LZ may have eliminated this. Register AddrReg = SrcOp.getReg(); - if (I < DimIdx) { + if (I < Intr->GradientStart) { AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0); PackedAddrs.push_back(AddrReg); } else { // Dz/dh, dz/dv and the last odd coord are packed with undef. Also, in 1D, // derivatives dx/dh and dx/dv are packed with undef. if (((I + 1) >= EndIdx) || - ((NumGradients / 2) % 2 == 1 && - (I == DimIdx + (NumGradients / 2) - 1 || - I == DimIdx + NumGradients - 1)) || + ((Intr->NumGradients / 2) % 2 == 1 && + (I == Intr->GradientStart + (Intr->NumGradients / 2) - 1 || + I == Intr->GradientStart + Intr->NumGradients - 1)) || // Check for _L to _LZ optimization - !MI.getOperand(I + 1).isReg()) { + !MI.getOperand(ArgOffset + I + 1).isReg()) { PackedAddrs.push_back( B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)}) .getReg(0)); } else { PackedAddrs.push_back( - B.buildBuildVector(V2S16, {AddrReg, MI.getOperand(I + 1).getReg()}) + B.buildBuildVector( + V2S16, {AddrReg, MI.getOperand(ArgOffset + I + 1).getReg()}) .getReg(0)); ++I; } @@ -4010,43 +4011,37 @@ static void convertImageAddrToPacked(MachineIRBuilder &B, MachineInstr &MI, /// the intrinsic's arguments. In cases like a16 addreses, this requires padding /// now unnecessary arguments with $noreg. bool AMDGPULegalizerInfo::legalizeImageIntrinsic( - MachineInstr &MI, MachineIRBuilder &B, - GISelChangeObserver &Observer, - const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr) const { + MachineInstr &MI, MachineIRBuilder &B, GISelChangeObserver &Observer, + const AMDGPU::ImageDimIntrinsicInfo *Intr) const { - const int NumDefs = MI.getNumExplicitDefs(); + const unsigned NumDefs = MI.getNumExplicitDefs(); + const unsigned ArgOffset = NumDefs + 1; bool IsTFE = NumDefs == 2; // We are only processing the operands of d16 image operations on subtargets // that use the unpacked register layout, or need to repack the TFE result. // TODO: Do we need to guard against already legalized intrinsics? const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = - AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode); + AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode); MachineRegisterInfo *MRI = B.getMRI(); const LLT S32 = LLT::scalar(32); const LLT S16 = LLT::scalar(16); const LLT V2S16 = LLT::vector(2, 16); - // Index of first address argument - const int AddrIdx = getImageVAddrIdxBegin(BaseOpcode, NumDefs); - - int NumVAddrs, NumGradients; - std::tie(NumVAddrs, NumGradients) = getImageNumVAddr(ImageDimIntr, BaseOpcode); - const int DMaskIdx = BaseOpcode->Atomic ? -1 : - getDMaskIdx(BaseOpcode, NumDefs); unsigned DMask = 0; // Check for 16 bit addresses and pack if true. - int DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; - LLT GradTy = MRI->getType(MI.getOperand(DimIdx).getReg()); - LLT AddrTy = MRI->getType(MI.getOperand(DimIdx + NumGradients).getReg()); + LLT GradTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg()); + LLT AddrTy = + MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg()); const bool IsG16 = GradTy == S16; const bool IsA16 = AddrTy == S16; int DMaskLanes = 0; if (!BaseOpcode->Atomic) { - DMask = MI.getOperand(DMaskIdx).getImm(); + DMask = MI.getOperand(ArgOffset + Intr->DMaskIndex).getImm(); if (BaseOpcode->Gather4) { DMaskLanes = 4; } else if (DMask != 0) { @@ -4073,7 +4068,7 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( if (IsTFE && DMask == 0) { DMask = 0x1; DMaskLanes = 1; - MI.getOperand(DMaskIdx).setImm(DMask); + MI.getOperand(ArgOffset + Intr->DMaskIndex).setImm(DMask); } if (BaseOpcode->Atomic) { @@ -4094,41 +4089,41 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( } } - int CorrectedNumVAddrs = NumVAddrs; + unsigned CorrectedNumVAddrs = Intr->NumVAddrs; // Optimize _L to _LZ when _L is zero if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo = - AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) { + AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode)) { const ConstantFP *ConstantLod; - const int LodIdx = AddrIdx + NumVAddrs - 1; - if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_GFCst(ConstantLod))) { + if (mi_match(MI.getOperand(ArgOffset + Intr->LodIndex).getReg(), *MRI, + m_GFCst(ConstantLod))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { // Set new opcode to _lz variant of _l, and change the intrinsic ID. - ImageDimIntr = AMDGPU::getImageDimInstrinsicByBaseOpcode( - LZMappingInfo->LZ, ImageDimIntr->Dim); + const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr = + AMDGPU::getImageDimInstrinsicByBaseOpcode(LZMappingInfo->LZ, + Intr->Dim); // The starting indexes should remain in the same place. - --NumVAddrs; --CorrectedNumVAddrs; - MI.getOperand(MI.getNumExplicitDefs()).setIntrinsicID( - static_cast(ImageDimIntr->Intr)); - MI.RemoveOperand(LodIdx); + MI.getOperand(MI.getNumExplicitDefs()) + .setIntrinsicID(static_cast(NewImageDimIntr->Intr)); + MI.RemoveOperand(ArgOffset + Intr->LodIndex); + Intr = NewImageDimIntr; } } } // Optimize _mip away, when 'lod' is zero - if (AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) { + if (AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode)) { int64_t ConstantLod; - const int LodIdx = AddrIdx + NumVAddrs - 1; - - if (mi_match(MI.getOperand(LodIdx).getReg(), *MRI, m_ICst(ConstantLod))) { + if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI, + m_ICst(ConstantLod))) { if (ConstantLod == 0) { // TODO: Change intrinsic opcode and remove operand instead or replacing // it with 0, as the _L to _LZ handling is done above. - MI.getOperand(LodIdx).ChangeToImmediate(0); + MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0); --CorrectedNumVAddrs; } } @@ -4143,18 +4138,17 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( } else if (!ST.hasG16()) return false; - if (NumVAddrs > 1) { + if (Intr->NumVAddrs > 1) { SmallVector PackedRegs; // Don't compress addresses for G16 - const int PackEndIdx = - IsA16 ? (AddrIdx + NumVAddrs) : (DimIdx + NumGradients); - packImageA16AddressToDwords(B, MI, PackedRegs, AddrIdx, DimIdx, - PackEndIdx, NumGradients); + const int PackEndIdx = IsA16 ? Intr->VAddrEnd : Intr->CoordStart; + packImageA16AddressToDwords(B, MI, PackedRegs, ArgOffset, Intr, + PackEndIdx); if (!IsA16) { // Add uncompressed address - for (int I = DimIdx + NumGradients; I != AddrIdx + NumVAddrs; ++I) { - int AddrReg = MI.getOperand(I).getReg(); + for (unsigned I = Intr->CoordStart; I < Intr->VAddrEnd; I++) { + int AddrReg = MI.getOperand(ArgOffset + I).getReg(); assert(B.getMRI()->getType(AddrReg) == LLT::scalar(32)); PackedRegs.push_back(AddrReg); } @@ -4170,9 +4164,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( PackedRegs.resize(1); } - const int NumPacked = PackedRegs.size(); - for (int I = 0; I != NumVAddrs; ++I) { - MachineOperand &SrcOp = MI.getOperand(AddrIdx + I); + const unsigned NumPacked = PackedRegs.size(); + for (unsigned I = Intr->VAddrStart; I < Intr->VAddrEnd; I++) { + MachineOperand &SrcOp = MI.getOperand(ArgOffset + I); if (!SrcOp.isReg()) { assert(SrcOp.isImm() && SrcOp.getImm() == 0); continue; @@ -4180,8 +4174,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( assert(SrcOp.getReg() != AMDGPU::NoRegister); - if (I < NumPacked) - SrcOp.setReg(PackedRegs[I]); + if (I - Intr->VAddrStart < NumPacked) + SrcOp.setReg(PackedRegs[I - Intr->VAddrStart]); else SrcOp.setReg(AMDGPU::NoRegister); } @@ -4200,8 +4194,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic( // allocation when possible. const bool UseNSA = CorrectedNumVAddrs >= 3 && ST.hasNSAEncoding(); - if (!UseNSA && NumVAddrs > 1) - convertImageAddrToPacked(B, MI, AddrIdx, NumVAddrs); + if (!UseNSA && Intr->NumVAddrs > 1) + convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart, + Intr->NumVAddrs); } int Flags = 0; diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index c223e1a8bc26..f56b8728e64c 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -896,9 +896,25 @@ class ImageDimIntrinsicInfo { AMDGPUDimProps Dim = I.P.Dim; AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval; + bits<8> NumGradients = DimEval.NumGradientArgs; + bits<8> NumDmask = DimEval.NumDmaskArgs; + bits<8> NumData = DimEval.NumDataArgs; + bits<8> NumVAddrs = DimEval.NumVAddrArgs; + bits<8> NumArgs = !add(DimEval.CachePolicyArgIndex, 1); + + bits<8> DMaskIndex = DimEval.DmaskArgIndex; + bits<8> VAddrStart = DimEval.VAddrArgIndex; bits<8> GradientStart = DimEval.GradientArgIndex; bits<8> CoordStart = DimEval.CoordArgIndex; + bits<8> LodIndex = DimEval.LodArgIndex; + bits<8> MipIndex = DimEval.MipArgIndex; bits<8> VAddrEnd = !add(DimEval.VAddrArgIndex, DimEval.NumVAddrArgs); + bits<8> RsrcIndex = DimEval.RsrcArgIndex; + bits<8> SampIndex = DimEval.SampArgIndex; + bits<8> UnormIndex = DimEval.UnormArgIndex; + bits<8> TexFailCtrlIndex = DimEval.TexFailCtrlArgIndex; + bits<8> CachePolicyIndex = DimEval.CachePolicyArgIndex; + bits<8> GradientTyArg = !add(I.P.NumRetAndDataAnyTypes, !foldl(0, I.P.ExtraAddrArgs, cnt, arg, !add(cnt, arg.Type.isAny))); bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0)); @@ -906,7 +922,10 @@ class ImageDimIntrinsicInfo { def ImageDimIntrinsicTable : GenericTable { let FilterClass = "ImageDimIntrinsicInfo"; - let Fields = ["Intr", "BaseOpcode", "Dim", "GradientStart", "CoordStart", "VAddrEnd", "GradientTyArg", "CoordTyArg"]; + let Fields = ["Intr", "BaseOpcode", "Dim", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs", + "DMaskIndex", "VAddrStart", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd", + "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex", + "GradientTyArg", "CoordTyArg"]; GenericEnum TypeOf_BaseOpcode = MIMGBaseOpcode; GenericEnum TypeOf_Dim = MIMGDim; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 73408346fbae..1725c56e0db3 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5945,7 +5945,7 @@ static void packImageA16AddressToDwords(SelectionDAG &DAG, SDValue Op, SDValue SITargetLowering::lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, - SelectionDAG &DAG) const { + SelectionDAG &DAG, bool WithChain) const { SDLoc DL(Op); MachineFunction &MF = DAG.getMachineFunction(); const GCNSubtarget* ST = &MF.getSubtarget(); @@ -5968,7 +5968,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op, int NumVDataDwords; bool AdjustRetType = false; - unsigned AddrIdx; // Index of first address argument + // Offset of intrinsic arguments + const unsigned ArgOffset = WithChain ? 2 : 1; + unsigned DMask; unsigned DMaskLanes = 0; @@ -5986,15 +5988,13 @@ SDValue SITargetLowering::lowerImage(SDValue Op, ResultTypes[0] = Is64Bit ? MVT::v2i64 : MVT::v2i32; DMask = Is64Bit ? 0xf : 0x3; NumVDataDwords = Is64Bit ? 4 : 2; - AddrIdx = 4; } else { DMask = Is64Bit ? 0x3 : 0x1; NumVDataDwords = Is64Bit ? 2 : 1; - AddrIdx = 3; } } else { - unsigned DMaskIdx = BaseOpcode->Store ? 3 : isa(Op) ? 2 : 1; - auto DMaskConst = cast(Op.getOperand(DMaskIdx)); + auto *DMaskConst = + cast(Op.getOperand(ArgOffset + Intr->DMaskIndex)); DMask = DMaskConst->getZExtValue(); DMaskLanes = BaseOpcode->Gather4 ? 4 : countPopulation(DMask); @@ -6034,56 +6034,45 @@ SDValue SITargetLowering::lowerImage(SDValue Op, AdjustRetType = true; } - - AddrIdx = DMaskIdx + 1; } - unsigned NumGradients = BaseOpcode->Gradients ? DimInfo->NumGradients : 0; - unsigned NumCoords = BaseOpcode->Coordinates ? DimInfo->NumCoords : 0; - unsigned NumLCM = BaseOpcode->LodOrClampOrMip ? 1 : 0; - unsigned NumVAddrs = BaseOpcode->NumExtraArgs + NumGradients + - NumCoords + NumLCM; - unsigned NumMIVAddrs = NumVAddrs; - + unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd; SmallVector VAddrs; // Optimize _L to _LZ when _L is zero if (LZMappingInfo) { - if (auto ConstantLod = - dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) { + if (auto *ConstantLod = dyn_cast( + Op.getOperand(ArgOffset + Intr->LodIndex))) { if (ConstantLod->isZero() || ConstantLod->isNegative()) { IntrOpcode = LZMappingInfo->LZ; // set new opcode to _lz variant of _l - NumMIVAddrs--; // remove 'lod' + VAddrEnd--; // remove 'lod' } } } // Optimize _mip away, when 'lod' is zero if (MIPMappingInfo) { - if (auto ConstantLod = - dyn_cast(Op.getOperand(AddrIdx+NumVAddrs-1))) { + if (auto *ConstantLod = dyn_cast( + Op.getOperand(ArgOffset + Intr->MipIndex))) { if (ConstantLod->isNullValue()) { IntrOpcode = MIPMappingInfo->NONMIP; // set new opcode to variant without _mip - NumMIVAddrs--; // remove 'lod' + VAddrEnd--; // remove 'mip' } } } // Push back extra arguments. - for (unsigned I = 0; I < BaseOpcode->NumExtraArgs; I++) - VAddrs.push_back(Op.getOperand(AddrIdx + I)); + for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) + VAddrs.push_back(Op.getOperand(ArgOffset + I)); // Check for 16 bit addresses or derivatives and pack if true. - unsigned DimIdx = AddrIdx + BaseOpcode->NumExtraArgs; - unsigned CoordIdx = DimIdx + NumGradients; - unsigned CoordsEnd = AddrIdx + NumMIVAddrs; - - MVT VAddrVT = Op.getOperand(DimIdx).getSimpleValueType(); + MVT VAddrVT = + Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType(); MVT VAddrScalarVT = VAddrVT.getScalarType(); MVT PackVectorVT = VAddrScalarVT == MVT::f16 ? MVT::v2f16 : MVT::v2i16; IsG16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; - VAddrVT = Op.getOperand(CoordIdx).getSimpleValueType(); + VAddrVT = Op.getOperand(ArgOffset + Intr->CoordStart).getSimpleValueType(); VAddrScalarVT = VAddrVT.getScalarType(); IsA16 = VAddrScalarVT == MVT::f16 || VAddrScalarVT == MVT::i16; if (IsA16 || IsG16) { @@ -6118,17 +6107,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } // Don't compress addresses for G16 - const int PackEndIdx = IsA16 ? CoordsEnd : CoordIdx; - packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, DimIdx, - PackEndIdx, NumGradients); + const int PackEndIdx = IsA16 ? VAddrEnd : (ArgOffset + Intr->CoordStart); + packImageA16AddressToDwords(DAG, Op, PackVectorVT, VAddrs, + ArgOffset + Intr->GradientStart, PackEndIdx, + Intr->NumGradients); if (!IsA16) { // Add uncompressed address - for (unsigned I = CoordIdx; I < CoordsEnd; I++) + for (unsigned I = ArgOffset + Intr->CoordStart; I < VAddrEnd; I++) VAddrs.push_back(Op.getOperand(I)); } } else { - for (unsigned I = DimIdx; I < CoordsEnd; I++) + for (unsigned I = ArgOffset + Intr->GradientStart; I < VAddrEnd; I++) VAddrs.push_back(Op.getOperand(I)); } @@ -6151,22 +6141,19 @@ SDValue SITargetLowering::lowerImage(SDValue Op, SDValue True = DAG.getTargetConstant(1, DL, MVT::i1); SDValue False = DAG.getTargetConstant(0, DL, MVT::i1); - unsigned CtrlIdx; // Index of texfailctrl argument SDValue Unorm; if (!BaseOpcode->Sampler) { Unorm = True; - CtrlIdx = AddrIdx + NumVAddrs + 1; } else { auto UnormConst = - cast(Op.getOperand(AddrIdx + NumVAddrs + 2)); + cast(Op.getOperand(ArgOffset + Intr->UnormIndex)); Unorm = UnormConst->getZExtValue() ? True : False; - CtrlIdx = AddrIdx + NumVAddrs + 3; } SDValue TFE; SDValue LWE; - SDValue TexFail = Op.getOperand(CtrlIdx); + SDValue TexFail = Op.getOperand(ArgOffset + Intr->TexFailCtrlIndex); bool IsTexFail = false; if (!parseTexFail(TexFail, DAG, &TFE, &LWE, IsTexFail)) return Op; @@ -6213,12 +6200,12 @@ SDValue SITargetLowering::lowerImage(SDValue Op, SDValue DLC; if (BaseOpcode->Atomic) { GLC = True; // TODO no-return optimization - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, nullptr, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex), + DAG, nullptr, &SLC, IsGFX10 ? &DLC : nullptr)) return Op; } else { - if (!parseCachePolicy(Op.getOperand(CtrlIdx + 1), DAG, &GLC, &SLC, - IsGFX10 ? &DLC : nullptr)) + if (!parseCachePolicy(Op.getOperand(ArgOffset + Intr->CachePolicyIndex), + DAG, &GLC, &SLC, IsGFX10 ? &DLC : nullptr)) return Op; } @@ -6231,9 +6218,9 @@ SDValue SITargetLowering::lowerImage(SDValue Op, } else { Ops.push_back(VAddr); } - Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs)); // rsrc + Ops.push_back(Op.getOperand(ArgOffset + Intr->RsrcIndex)); if (BaseOpcode->Sampler) - Ops.push_back(Op.getOperand(AddrIdx + NumVAddrs + 1)); // sampler + Ops.push_back(Op.getOperand(ArgOffset + Intr->SampIndex)); Ops.push_back(DAG.getTargetConstant(DMask, DL, MVT::i32)); if (IsGFX10) Ops.push_back(DAG.getTargetConstant(DimInfo->Encoding, DL, MVT::i32)); @@ -6714,7 +6701,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, false); return Op; } @@ -7376,7 +7363,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, default: if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, true); return SDValue(); } @@ -7716,7 +7703,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrinsicID)) - return lowerImage(Op, ImageDimIntr, DAG); + return lowerImage(Op, ImageDimIntr, DAG, true); return Op; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 6bfa33cef7ce..9aa307f7bc59 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -59,7 +59,7 @@ private: SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, MVT VT, unsigned Offset) const; SDValue lowerImage(SDValue Op, const AMDGPU::ImageDimIntrinsicInfo *Intr, - SelectionDAG &DAG) const; + SelectionDAG &DAG, bool WithChain) const; SDValue lowerSBuffer(EVT VT, SDLoc DL, SDValue Rsrc, SDValue Offset, SDValue CachePolicy, SelectionDAG &DAG) const; -- GitLab