Skip to content

Commit

Permalink
Merge branch 'sycl' into Testing-adding-handles-to-opencl
Browse files Browse the repository at this point in the history
  • Loading branch information
omarahmed1111 committed Nov 7, 2024
2 parents 1f5b4a4 + dd2390c commit 2af73c3
Show file tree
Hide file tree
Showing 80 changed files with 1,192 additions and 308 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ LANGOPT(SYCLExperimentalRangeRounding, 1, 0, "Use experimental parallel for rang
LANGOPT(SYCLEnableIntHeaderDiags, 1, 0, "Enable diagnostics that require the "
"SYCL integration header")
LANGOPT(SYCLIsNativeCPU , 1, 0, "Generate code for SYCL Native CPU")
LANGOPT(SYCLRTCMode, 1, 0, "Compile in RTC mode")

LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)")
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6877,6 +6877,11 @@ defm sycl_esimd_force_stateless_mem : BoolFOption<"sycl-esimd-force-stateless-me
NegFlag<SetFalse, [], [ClangOption, CLOption], "Do not enforce using "
"stateless memory accesses.">,
BothFlags<[], [ClangOption, CLOption, CC1Option], "">>;
defm sycl_rtc_mode: BoolFOption<"sycl-rtc-mode",
LangOpts<"SYCLRTCMode">, DefaultFalse,
PosFlag<SetTrue, [], [ClangOption], "Enable">,
NegFlag<SetFalse, [], [ClangOption], "Disable">,
BothFlags<[HelpHidden], [ClangOption, CC1Option], " RTC mode in SYCL.">>;
// TODO: Remove this option once ESIMD headers are updated to
// guard vectors to be device only.
def fno_sycl_esimd_build_host_code : Flag<["-"], "fno-sycl-esimd-build-host-code">,
Expand Down
60 changes: 39 additions & 21 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5589,6 +5589,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Args.AddLastArg(CmdArgs, options::OPT_fsycl_decompose_functor,
options::OPT_fno_sycl_decompose_functor);

Args.AddLastArg(CmdArgs, options::OPT_fsycl_rtc_mode,
options::OPT_fno_sycl_rtc_mode);

// Forward -fsycl-instrument-device-code option to cc1. This option will
// only be used for SPIR/SPIR-V based targets.
if (Triple.isSPIROrSPIRV())
Expand Down Expand Up @@ -10350,33 +10353,48 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA,
assert(JA.getInputs().size() == Inputs.size() &&
"Not have inputs for all dependence actions??");

// For FPGA, we wrap the host objects before archiving them when using
// -fsycl-link. This allows for better extraction control from the
// archive when we need the host objects for subsequent compilations.
if (OffloadingKind == Action::OFK_None &&
C.getArgs().hasArg(options::OPT_fintelfpga) &&
C.getArgs().hasArg(options::OPT_fsycl_link_EQ)) {

// Add offload targets and inputs.
CmdArgs.push_back(C.getArgs().MakeArgString(
Twine("-kind=") + Action::GetOffloadKindName(OffloadingKind)));
CmdArgs.push_back(
TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple()));
// For FPGA, we wrap the host objects before archiving them when using
// -fsycl-link. This allows for better extraction control from the
// archive when we need the host objects for subsequent compilations.
if (C.getArgs().hasArg(options::OPT_fintelfpga)) {

if (Inputs[0].getType() == types::TY_Tempfiletable ||
Inputs[0].getType() == types::TY_Tempfilelist)
// Input files are passed via the batch job file table.
CmdArgs.push_back(C.getArgs().MakeArgString("-batch"));
// Add offload targets and inputs.
CmdArgs.push_back(C.getArgs().MakeArgString(
Twine("-kind=") + Action::GetOffloadKindName(OffloadingKind)));
CmdArgs.push_back(
TCArgs.MakeArgString(Twine("-target=") + Triple.getTriple()));

// Add input.
assert(Inputs[0].isFilename() && "Invalid input.");
CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename()));
if (Inputs[0].getType() == types::TY_Tempfiletable ||
Inputs[0].getType() == types::TY_Tempfilelist)
// Input files are passed via the batch job file table.
CmdArgs.push_back(C.getArgs().MakeArgString("-batch"));

C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
CmdArgs, Inputs));
return;
// Add input.
assert(Inputs[0].isFilename() && "Invalid input.");
CmdArgs.push_back(TCArgs.MakeArgString(Inputs[0].getFilename()));

C.addCommand(std::make_unique<Command>(
JA, *this, ResponseFileSupport::None(),
TCArgs.MakeArgString(getToolChain().GetProgramPath(getShortName())),
CmdArgs, Inputs));
return;
} else {
// When compiling and linking separately, we need to propagate the
// compression related CLI options to offload-wrapper. Don't propagate
// these options when wrapping objects for FPGA.
if (C.getInputArgs().getLastArg(options::OPT_offload_compress)) {
CmdArgs.push_back(
C.getArgs().MakeArgString(Twine("-offload-compress")));
// -offload-compression-level=<>
if (Arg *A = C.getInputArgs().getLastArg(
options::OPT_offload_compression_level_EQ))
CmdArgs.push_back(C.getArgs().MakeArgString(
Twine("-offload-compression-level=") + A->getValue()));
}
}
}

// Add offload targets and inputs.
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,9 @@ StringRef SYCL::gen::resolveGenDevice(StringRef DeviceName) {
.Case("amd_gpu_gfx700", "gfx700")
.Case("amd_gpu_gfx701", "gfx701")
.Case("amd_gpu_gfx702", "gfx702")
.Case("amd_gpu_gfx703", "gfx703")
.Case("amd_gpu_gfx704", "gfx704")
.Case("amd_gpu_gfx705", "gfx705")
.Case("amd_gpu_gfx801", "gfx801")
.Case("amd_gpu_gfx802", "gfx802")
.Case("amd_gpu_gfx803", "gfx803")
Expand Down Expand Up @@ -1415,6 +1418,9 @@ SmallString<64> SYCL::gen::getGenDeviceMacro(StringRef DeviceName) {
.Case("gfx700", "AMD_GPU_GFX700")
.Case("gfx701", "AMD_GPU_GFX701")
.Case("gfx702", "AMD_GPU_GFX702")
.Case("gfx703", "AMD_GPU_GFX703")
.Case("gfx704", "AMD_GPU_GFX704")
.Case("gfx705", "AMD_GPU_GFX705")
.Case("gfx801", "AMD_GPU_GFX801")
.Case("gfx802", "AMD_GPU_GFX802")
.Case("gfx803", "AMD_GPU_GFX803")
Expand Down
12 changes: 12 additions & 0 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2040,6 +2040,11 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler {
}

bool handleStructType(ParmVarDecl *PD, QualType ParamTy) final {
if (SemaSYCLRef.getLangOpts().SYCLRTCMode) {
// When compiling in RTC mode, the restriction regarding forward
// declarations doesn't apply, as we don't need the integration header.
return isValid();
}
CXXRecordDecl *RD = ParamTy->getAsCXXRecordDecl();
// For free functions all struct/class kernel arguments are forward declared
// in integration header, that adds additional restrictions for kernel
Expand Down Expand Up @@ -6453,6 +6458,13 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) {
O << "} // namespace _V1\n";
O << "} // namespace sycl\n";

// The rest of this function only applies to free-function kernels. However,
// in RTC mode, we do not need integration header information for
// free-function kernels, so we can return early here.
if (S.getLangOpts().SYCLRTCMode) {
return;
}

unsigned ShimCounter = 1;
int FreeFunctionCount = 0;
for (const KernelDesc &K : KernelDescs) {
Expand Down
80 changes: 80 additions & 0 deletions clang/test/CodeGenSYCL/free_function_int_header_rtc_mode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -fsycl-rtc-mode -fsycl-int-header=%t.rtc.h %s
// RUN: FileCheck -input-file=%t.rtc.h --check-prefixes=CHECK,CHECK-RTC %s

// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown -sycl-std=2020 -fno-sycl-rtc-mode -fsycl-int-header=%t.nortc.h %s
// RUN: FileCheck -input-file=%t.nortc.h --check-prefixes=CHECK,CHECK-NORTC %s

// This test checks that free-function kernel information is included or
// excluded from the integration header, depending on the '-fsycl-rtc-mode'
// flag.

#include "sycl.hpp"

[[__sycl_detail__::add_ir_attributes_function("sycl-single-task-kernel", 2)]]
void free_function_single(int* ptr, int start, int end){
for(int i = start; i < end; ++i){
ptr[i] = start + 66;
}
}

[[__sycl_detail__::add_ir_attributes_function("sycl-nd-range-kernel", 2)]]
void free_function_nd_range(int* ptr, int start, int end){
for(int i = start; i < end; ++i){
ptr[i] = start + 66;
}
}

template<typename KernelName, typename KernelFunc>
__attribute__((sycl_kernel)) void kernel(const KernelFunc &kernelFunc){
kernelFunc();
}

int main(){
sycl::accessor<int, 1, sycl::access::mode::read_write> accessorA;
kernel<class Kernel_Function>(
[=]() {
accessorA.use();
});
return 0;
}


// CHECK: const char* const kernel_names[] = {
// CHECK-NEXT: "{{.*}}__sycl_kernel_free_function_singlePiii",
// CHECK-NEXT: "{{.*}}__sycl_kernel_free_function_nd_rangePiii",
// CHECK-NEXT: "{{.*}}Kernel_Function",


// CHECK: static constexpr const char* getName() { return "{{.*}}__sycl_kernel_free_function_singlePiii"; }
// CHECK: static constexpr const char* getName() { return "{{.*}}__sycl_kernel_free_function_nd_rangePiii"; }
// CHECK: static constexpr const char* getName() { return "{{.*}}Kernel_Function"; }

// CHECK-RTC-NOT: free_function_single_kernel
// CHECK-RTC-NOT: free_function_nd_range

// CHECK-NORTC: void free_function_single(int *ptr, int start, int end);
// CHECK-NORTC: static constexpr auto __sycl_shim[[#FIRST:]]()
// CHECK-NORTC-NEXT: return (void (*)(int *, int, int))free_function_single;

// CHECK-NORTC: struct ext::oneapi::experimental::is_kernel<__sycl_shim[[#FIRST]]()> {
// CHECK-NORTC-NEXT: static constexpr bool value = true;

// CHECK-NORTC: struct ext::oneapi::experimental::is_single_task_kernel<__sycl_shim[[#FIRST]]()> {
// CHECK-NORTC-NEXT: static constexpr bool value = true;


// CHECK-NORTC: void free_function_nd_range(int *ptr, int start, int end);
// CHECK-NORTC: static constexpr auto __sycl_shim[[#SECOND:]]() {
// CHECK-NORTC-NEXT: return (void (*)(int *, int, int))free_function_nd_range;

// CHECK-NORTC: struct ext::oneapi::experimental::is_kernel<__sycl_shim[[#SECOND]]()> {
// CHECK-NORTC-NEXT: static constexpr bool value = true;

// CHECK-NORTC: struct ext::oneapi::experimental::is_nd_range_kernel<__sycl_shim2(), 2> {
// CHECK-NORTC-NEXT: static constexpr bool value = true;

// CHECK-NORTC: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim[[#FIRST]]()>() {
// CHECK-NORTC-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"{{.*}}__sycl_kernel_free_function_singlePiii"});

// CHECK-NORTC: kernel_id ext::oneapi::experimental::get_kernel_id<__sycl_shim[[#SECOND]]()>() {
// CHECK-NORTC-NEXT: return sycl::detail::get_kernel_id_impl(std::string_view{"{{.*}}__sycl_kernel_free_function_nd_rangePiii"});
18 changes: 18 additions & 0 deletions clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx702 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx703 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx704 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx705 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx801 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-DEVICE-TRIPLE
Expand Down Expand Up @@ -120,6 +129,15 @@
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx702 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx703 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx704 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx705 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx801 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefix=CHECK-SYCL-AMDGCN-AMD-AMDHSA-OFFLOAD-ARCH
Expand Down
9 changes: 9 additions & 0 deletions clang/test/Driver/sycl-oneapi-gpu-amdgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx702 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx702 -DMAC_STR=GFX702
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx703 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx703 -DMAC_STR=GFX703
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx704 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx704 -DMAC_STR=GFX704
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx705 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx705 -DMAC_STR=GFX705
// RUN: %clangxx -fsycl -nogpulib -fsycl-targets=amd_gpu_gfx801 \
// RUN: -fsycl-libspirv-path=%S/Inputs/SYCL/libspirv.bc -### %s 2>&1 | \
// RUN: FileCheck %s --check-prefixes=DEVICE_AMD,MACRO_AMD -DDEV_STR=gfx801 -DMAC_STR=GFX801
Expand Down
32 changes: 32 additions & 0 deletions clang/test/Driver/sycl-rtc-mode.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
///
/// Perform driver test for SYCL RTC mode.
///

/// Check that the '-fsycl-rtc-mode' is correctly forwarded to the device
/// compilation and only to the device compilation.

// RUN: %clangxx -fsycl -fsycl-rtc-mode --no-offload-new-driver %s -### 2>&1 \
// RUN: | FileCheck %s

// RUN: %clangxx -fsycl -fsycl-rtc-mode --offload-new-driver %s -### 2>&1 \
// RUN: | FileCheck %s

// CHECK: clang{{.*}} "-fsycl-is-device"
// CHECK-SAME: -fsycl-rtc-mode
// CHECK: clang{{.*}} "-fsycl-is-host"
// CHECK-NOT: -fsycl-rtc-mode


/// Check that the '-fno-sycl-rtc-mode' is correctly forwarded to the device
/// compilation and only to the device compilation.

// RUN: %clangxx -fsycl -fno-sycl-rtc-mode --no-offload-new-driver %s -### 2>&1 \
// RUN: | FileCheck %s --check-prefix=NEGATIVE

// RUN: %clangxx -fsycl -fno-sycl-rtc-mode --offload-new-driver %s -### 2>&1 \
// RUN: | FileCheck %s --check-prefix=NEGATIVE

// NEGATIVE: clang{{.*}} "-fsycl-is-device"
// NEGATIVE-SAME: -fno-sycl-rtc-mode
// NEGATIVE: clang{{.*}} "-fsycl-is-host"
// NEGATIVE-NOT: -fsycl-rtc-mode
1 change: 0 additions & 1 deletion clang/tools/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ add_clang_subdirectory(clang-diff)
add_clang_subdirectory(clang-format)
add_clang_subdirectory(clang-fuzzer)
add_clang_subdirectory(clang-import-test)
add_clang_subdirectory(clang-nvlink-wrapper)
add_clang_subdirectory(clang-linker-wrapper)
add_clang_subdirectory(clang-nvlink-wrapper)
add_clang_subdirectory(clang-offload-packager)
Expand Down
2 changes: 2 additions & 0 deletions devops/cts_exclude_filter_L0_GPU
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ kernel_bundle
marray
# fix: https://github.com/KhronosGroup/SYCL-CTS/pull/964
accessor_legacy
# CMPLRLLVM-61839
multi_ptr
2 changes: 2 additions & 0 deletions devops/cts_exclude_filter_OCL_CPU
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,5 @@ math_builtin_api
hierarchical
# fix: https://github.com/KhronosGroup/SYCL-CTS/pull/964
accessor_legacy
# CMPLRLLVM-61839
multi_ptr
6 changes: 3 additions & 3 deletions devops/dependencies.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"level_zero": {
"github_tag": "v1.18.3",
"version": "v1.18.3",
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.18.3",
"github_tag": "v1.18.5",
"version": "v1.18.5",
"url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.18.5",
"root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu"
},
"tbb": {
Expand Down
13 changes: 12 additions & 1 deletion libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,19 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
set ( has_distinct_generic_addrspace TRUE )
if ( ARCH STREQUAL spirv OR ARCH STREQUAL spirv64 )
set( opt_flags -O3 )
list( APPEND build_flags -DCLC_SPIRV )
set( spvflags --spirv-max-version=1.1 )
set( MACRO_ARCH SPIRV32 )
if( ARCH STREQUAL spirv64 )
set( MACRO_ARCH SPIRV64 )
endif()
elseif( ARCH STREQUAL clspv OR ARCH STREQUAL clspv64 )
set( opt_flags -O3 )
list( APPEND build_flags -DCLC_CLSPV )
set( MACRO_ARCH CLSPV32 )
if( ARCH STREQUAL clspv64 )
set( MACRO_ARCH CLSPV64 )
endif()
elseif( ARCH STREQUAL nvptx OR ARCH STREQUAL nvptx64 )
set( opt_flags -O3 "--nvvm-reflect-enable=false" )
set( has_distinct_generic_addrspace FALSE )
Expand All @@ -437,6 +447,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
set( has_distinct_generic_addrspace FALSE )
else()
set( opt_flags -O3 )
set( MACRO_ARCH ${ARCH} )
endif()

# Enable SPIR-V builtin function declarations, so they don't
Expand Down Expand Up @@ -483,7 +494,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
list(APPEND build_flags -D__unix__)
endif()

string( TOUPPER "CLC_${ARCH}" CLC_TARGET_DEFINE )
string( TOUPPER "CLC_${MACRO_ARCH}" CLC_TARGET_DEFINE )

list( APPEND build_flags
-D__CLC_INTERNAL
Expand Down
4 changes: 2 additions & 2 deletions libclc/clc/include/clc/clcfunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

// avoid inlines for SPIR-V related targets since we'll optimise later in the
// chain
#if defined(CLC_SPIRV) || defined(CLC_SPIRV64)
#if defined(CLC_SPIRV)
#define _CLC_DEF
#elif defined(CLC_CLSPV) || defined(CLC_CLSPV64)
#elif defined(CLC_CLSPV)
#define _CLC_DEF __attribute__((noinline)) __attribute__((clspv_libclc_builtin))
#else
#define _CLC_DEF __attribute__((always_inline))
Expand Down
Loading

0 comments on commit 2af73c3

Please sign in to comment.