diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 5310c542d3569be54074d48248bb97a6ee962284..894f341bb0851b85b848f9bc250473840f08bb04 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -350,20 +350,14 @@ private: refineUsesAlignmentAndAA(GEP, A, DL, AliasScope, NoAlias); } - // Mark kernels with asm that reads the address of the allocated structure - // This is not necessary for lowering. This lets other passes, specifically - // PromoteAlloca, accurately calculate how much LDS will be used by the - // kernel after lowering. + // This ensures the variable is allocated when called functions access it. + // It also lets other passes, specifically PromoteAlloca, accurately + // calculate how much LDS will be used by the kernel after lowering. if (!F) { IRBuilder<> Builder(Ctx); - SmallPtrSet Kernels; for (Function &Func : M.functions()) { - if (Func.isDeclaration()) - continue; - - if (AMDGPU::isKernelCC(&Func) && !Kernels.contains(&Func)) { + if (!Func.isDeclaration() && AMDGPU::isKernelCC(&Func)) { markUsedByKernel(Builder, &Func, SGV); - Kernels.insert(&Func); } } }