Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][SYCLLowerWGLocalMemoryPass] Remove implicit dependency on AlwaysInlinerPass and move to PipelineStart #16356

Open
wants to merge 13 commits into
base: sycl
Choose a base branch
from
Open
7 changes: 3 additions & 4 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,9 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu,
/*ExcludeAspects=*/{"fp64"}));
MPM.addPass(SYCLPropagateJointMatrixUsagePass());
// Allocate static local memory in SYCL kernel scope for each allocation
bader marked this conversation as resolved.
Show resolved Hide resolved
// call.
MPM.addPass(SYCLLowerWGLocalMemoryPass());
});
else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode)
PB.registerPipelineStartEPCallback(
Expand Down Expand Up @@ -1184,10 +1187,6 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
MPM.addPass(SPIRITTAnnotationsPass());
}

// Allocate static local memory in SYCL kernel scope for each allocation
// call.
MPM.addPass(SYCLLowerWGLocalMemoryPass());

// Process properties and annotations
MPM.addPass(CompileTimePropertiesPass());

Expand Down
29 changes: 0 additions & 29 deletions clang/test/CodeGenSYCL/group-local-memory.cpp

This file was deleted.

31 changes: 20 additions & 11 deletions clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,24 @@
// SYCL device target, and can be disabled with -fno-sycl-early-optimizations.
// New pass manager doesn't print all passes tree, only module level.
//
// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NEWPM-EARLYOPT
// CHECK-NEWPM-EARLYOPT: ConstantMergePass
// CHECK-NEWPM-EARLYOPT: SYCLMutatePrintfAddrspacePass
// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s
// CHECK: SYCLVirtualFunctionsAnalysisPass
// CHECK: ESIMDVerifierPass
// CHECK: SYCLConditionalCallOnDevicePass
// CHECK: SYCLPropagateAspectsUsagePass
// CHECK: SYCLPropagateJointMatrixUsagePass
// CHECK: SYCLLowerWGLocalMemoryPass
// CHECK: InferFunctionAttrsPass
// CHECK: AlwaysInlinerPass
// CHECK: ModuleInlinerWrapperPass
// CHECK: ConstantMergePass
// CHECK: SYCLMutatePrintfAddrspacePass
// CHECK: SYCLPropagateAspectsUsagePass
// CHECK: SYCLAddOptLevelAttributePass
// CHECK: CompileTimePropertiesPass
// CHECK: RecordSYCLAspectNamesPass
// CHECK: CleanupSYCLMetadataPass
//
// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -fno-sycl-early-optimizations -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NEWPM-NOEARLYOPT
// CHECK-NEWPM-NOEARLYOPT-NOT: ConstantMergePass
// CHECK-NEWPM-NOEARLYOPT: SYCLMutatePrintfAddrspacePass

// Checks that the compile time properties pass is added into the compilation pipeline
//
// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-COMPTIMEPROPS
// CHECK-COMPTIMEPROPS: Running pass: CompileTimePropertiesPass on [module]
// RUN: %clang_cc1 -O2 -fsycl-is-device -triple spir64-unknown-unknown %s -mdebug-pass Structure -emit-llvm -fno-sycl-early-optimizations -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK-NOEARLYOPT
// CHECK-NOEARLYOPT-NOT: ConstantMergePass1
// CHECK-NOEARLYOPT: SYCLMutatePrintfAddrspacePass
35 changes: 31 additions & 4 deletions llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/LowerWGLocalMemory.h"
#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/Cloning.h"

using namespace llvm;

Expand Down Expand Up @@ -52,6 +54,32 @@ ModulePass *llvm::createSYCLLowerWGLocalMemoryLegacyPass() {
return new SYCLLowerWGLocalMemoryLegacy();
}

static bool inlineAllocateLocalMemoryFunc(Module &M) {
bader marked this conversation as resolved.
Show resolved Hide resolved
Function *ALMFunc = M.getFunction(SYCL_ALLOCLOCALMEM_CALL);
if (!ALMFunc)
return false;

auto *Caller = cast<CallInst>(*ALMFunc->user_begin())->getFunction();
if (!Caller->hasFnAttribute(Attribute::AlwaysInline)) {
// Already inlined.
return false;
}
std::string FName = llvm::demangle(Caller->getName());
if (FName.find("sycl::_V1::ext::oneapi::group_local_memory") ==
std::string::npos) {
// Already inlined.
return false;
}
for (User *U : make_early_inc_range(Caller->users())) {
auto *CI = cast<CallInst>(U);
InlineFunctionInfo IFI;
[[maybe_unused]] auto Result = InlineFunction(*CI, IFI);
assert(Result.isSuccess() && "inlining failed");
bader marked this conversation as resolved.
Show resolved Hide resolved
}
Caller->eraseFromParent();
return true;
}

// TODO: It should be checked that __sycl_allocateLocalMemory (or its source
// form - group_local_memory) does not occur:
// - in a function (other than user lambda/functor)
Expand Down Expand Up @@ -94,7 +122,6 @@ static bool allocaWGLocalMemory(Module &M) {
Function *ALMFunc = M.getFunction(SYCL_ALLOCLOCALMEM_CALL);
if (!ALMFunc)
return false;

assert(ALMFunc->isDeclaration() && "should have declaration only");

SmallVector<CallInst *, 4> DelCalls;
Expand All @@ -118,7 +145,7 @@ static bool allocaWGLocalMemory(Module &M) {

PreservedAnalyses SYCLLowerWGLocalMemoryPass::run(Module &M,
ModuleAnalysisManager &) {
if (allocaWGLocalMemory(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
bool Changed = inlineAllocateLocalMemoryFunc(M);
Changed |= allocaWGLocalMemory(M);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
jsji marked this conversation as resolved.
Show resolved Hide resolved
}