From 5266c2d05a1baf84ea373896ff3e3c4f16fcd6c0 Mon Sep 17 00:00:00 2001 From: Gauthier Harnisch Date: Wed, 21 Oct 2020 14:53:20 +0200 Subject: [PATCH 1/3] [SYCL] Add support for specifying DDR banks in accessors --- clang/lib/Sema/SemaSYCL.cpp | 47 ++++++++++++++++ llvm/lib/SYCL/KernelPropGen.cpp | 51 ++++++++++++++++- llvm/lib/SYCL/PrepareSYCLOpt.cpp | 18 ++++++ .../CL/sycl/detail/property_helper.hpp | 2 +- sycl/include/CL/sycl/xilinx/fpga.hpp | 1 + .../CL/sycl/xilinx/fpga/memory_properties.hpp | 55 +++++++++++++++++++ .../CL/sycl/xilinx/fpga/opt_decorate_func.hpp | 18 +++--- .../CL/sycl/xilinx/fpga/partition_array.hpp | 4 +- .../xocc_tests/simple_tests/ddr_bank_test.cpp | 40 ++++++++++++++ .../xocc_tests/simple_tests/explicit_copy.cpp | 2 +- 10 files changed, 225 insertions(+), 13 deletions(-) create mode 100644 sycl/include/CL/sycl/xilinx/fpga/memory_properties.hpp create mode 100644 sycl/test/xocc_tests/simple_tests/ddr_bank_test.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 948f731e3abb..c54b14126d83 100755 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1666,6 +1666,25 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler { } }; +static bool isSyclXilinxType(const QualType &Ty) { + static std::array Namespaces = {"cl", "sycl", "xilinx"}; + llvm::SmallVector CtxStack; + CtxStack.push_back(cast(Ty->getAsTagDecl())); + while (!isa(CtxStack.back()->getParent())) + CtxStack.push_back(CtxStack.back()->getParent()); + for (unsigned Idx = 0; Idx < Namespaces.size(); Idx++) { + auto *NS = dyn_cast(CtxStack.pop_back_val()); + if (!NS) + return false; + IdentifierInfo *II = NS->getIdentifier(); + if (!II) + return false; + if (!II->isStr(Namespaces[Idx])) + return false; + } + return true; +} + // A type to Create and own the FunctionDecl for the kernel. class SyclKernelDeclCreator : public SyclKernelFieldHandler { FunctionDecl *KernelDecl; @@ -1702,6 +1721,33 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { Params.push_back(NewParam); } + // Obtain an integer value stored in a template parameter of buffer_location + // property to pass it to buffer_location kernel attribute + void handleXilinxProperty(ParmVarDecl *Param, QualType PropTy, + SourceLocation Loc) { + if (!isSyclXilinxType(PropTy)) + return; + /// TODO: when D88645 lands update this code to use that instead. + ASTContext &Ctx = SemaRef.getASTContext(); + const CXXRecordDecl *RD = PropTy->getAsCXXRecordDecl(); + const CXXRecordDecl *PRD = cast(RD->getParent()); + std::string Args; + if (const auto *PropDecl = dyn_cast(RD)) { + for (auto &Arg : PropDecl->getTemplateArgs().asArray()) { + switch (Arg.getKind()) { + case TemplateArgument::Integral: + Args += "_" + std::to_string(static_cast( + Arg.getAsIntegral().getExtValue())); + break; + default: + llvm_unreachable("unimplemented"); + } + } + } + Param->addAttr(AnnotateAttr::CreateImplicit( + Ctx, "xilinx_" + PRD->getName().str() + Args)); + } + // Handle accessor properties. If any properties were found in // the accessor_property_list - add the appropriate attributes to ParmVarDecl. void handleAccessorPropertyList(ParmVarDecl *Param, @@ -1722,6 +1768,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { QualType PropTy = Prop->getAsType(); if (Util::isSyclBufferLocationType(PropTy)) handleBufferLocationProperty(Param, PropTy, Loc); + handleXilinxProperty(Param, PropTy, Loc); } } diff --git a/llvm/lib/SYCL/KernelPropGen.cpp b/llvm/lib/SYCL/KernelPropGen.cpp index f9ec47dc629f..df140610ac98 100644 --- a/llvm/lib/SYCL/KernelPropGen.cpp +++ b/llvm/lib/SYCL/KernelPropGen.cpp @@ -16,6 +16,11 @@ #include #include +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/SYCL/KernelPropGen.h" #include "llvm/ADT/StringRef.h" #include "llvm/IR/Function.h" @@ -50,6 +55,8 @@ struct KernelPropGen : public ModulePass { static char ID; // Pass identification, replacement for typeid + llvm::SmallDenseMap UserSpecifiedDDRBanks; + KernelPropGen() : ModulePass(ID) {} /// Test if a function is a SPIR kernel @@ -70,11 +77,50 @@ struct KernelPropGen : public ModulePass { return FileFD; } + void CollectUserSpecifiedDDRBanks(Function &F) { + constexpr StringRef Prefix = "xilinx_ddr_bank_"; + for (Instruction &I : instructions(F)) { + auto *CB = dyn_cast(&I); + if (!CB || CB->getIntrinsicID() != Intrinsic::var_annotation) + continue; + auto *Alloca = + dyn_cast_or_null(getUnderlyingObject(CB->getOperand(0))); + auto *Str = cast( + cast(getUnderlyingObject(CB->getOperand(1))) + ->getOperand(0)); + if (!Alloca) + continue; + StringRef Annot = Str->getRawDataValues(); + if (!Annot.startswith(Prefix)) + continue; + Annot = Annot.drop_front(Prefix.size()).drop_back(); + unsigned Bank = 0; + if (Annot.getAsInteger(10, Bank)) + continue; + UserSpecifiedDDRBanks[Alloca] = Bank; + } + } + + unsigned findDDRBankFor(Argument *Arg) { + for (User *U : Arg->users()) { + if (auto *Store = dyn_cast(U)) + if (Store->getValueOperand() == Arg) { + auto Lookup = UserSpecifiedDDRBanks.find(dyn_cast_or_null( + getUnderlyingObject(Store->getPointerOperand()))); + if (Lookup == UserSpecifiedDDRBanks.end()) + continue; + return Lookup->second; + } + } + return 0; + } + void GenerateXOCCPropertyScript(Module &M, llvm::raw_fd_ostream &O) { llvm::SmallString<512> kernelNames; llvm::SmallString<512> DDRArgs; for (auto &F : M.functions()) { if (isKernel(F)) { + CollectUserSpecifiedDDRBanks(F); kernelNames += (" \"" + F.getName() + "\" ").str(); for (auto& Arg : F.args()) { @@ -106,8 +152,9 @@ struct KernelPropGen : public ModulePass { // default compute unit name. If more than one CU is generated // (which we don't support yet in any case) then they would be // KernelName_2..KernelName_3 etc. - DDRArgs += ("--sp " + F.getName() + "_1." + Arg.getName() - + ":DDR[0] ").str(); + DDRArgs += ("--sp " + F.getName() + "_1." + Arg.getName() + + ":DDR[" + std::to_string(findDDRBankFor(&Arg)) + "] ") + .str(); } } O << "\n"; // line break for new set of kernel properties diff --git a/llvm/lib/SYCL/PrepareSYCLOpt.cpp b/llvm/lib/SYCL/PrepareSYCLOpt.cpp index 7412ad6264aa..072629261592 100644 --- a/llvm/lib/SYCL/PrepareSYCLOpt.cpp +++ b/llvm/lib/SYCL/PrepareSYCLOpt.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/SYCL/PrepareSYCLOpt.h" #include "llvm/Support/Casting.h" @@ -55,9 +56,26 @@ struct PrepareSYCLOpt : public ModulePass { } } + /// At this point in the pipeline Annotations intrinsic have all been + /// converted into what they need to be. But they can still be present and + /// have pointer on pointer as arguments which v++ can't deal with. + void removeAnnotationsIntrisic(Module &M) { + SmallVector ToRemove; + for (Function &F : M.functions()) + if (F.getIntrinsicID() == Intrinsic::annotation || + F.getIntrinsicID() == Intrinsic::ptr_annotation || + F.getIntrinsicID() == Intrinsic::var_annotation) + for (User *U : F.users()) + if (auto *I = dyn_cast(U)) + ToRemove.push_back(I); + for (Instruction *I : ToRemove) + I->eraseFromParent(); + } + bool runOnModule(Module &M) override { turnNonKernelsIntoPrivate(M); setCallingConventions(M); + removeAnnotationsIntrisic(M); return true; } }; diff --git a/sycl/include/CL/sycl/detail/property_helper.hpp b/sycl/include/CL/sycl/detail/property_helper.hpp index 3c8a82bc0011..6c10e560b757 100644 --- a/sycl/include/CL/sycl/detail/property_helper.hpp +++ b/sycl/include/CL/sycl/detail/property_helper.hpp @@ -40,7 +40,7 @@ enum PropWithDataKind { BufferContextBound, ImageUseMutex, ImageContextBound, - PropWithDataKindSize + PropWithDataKindSize, }; // Base class for dataless properties, needed to check that the type of an diff --git a/sycl/include/CL/sycl/xilinx/fpga.hpp b/sycl/include/CL/sycl/xilinx/fpga.hpp index a55150a10f0a..08d8b1b9e3a7 100644 --- a/sycl/include/CL/sycl/xilinx/fpga.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga.hpp @@ -18,5 +18,6 @@ #include "CL/sycl/xilinx/fpga/opt_decorate_func.hpp" #include "CL/sycl/xilinx/fpga/partition_array.hpp" #include "CL/sycl/xilinx/fpga/kernel_properties.hpp" +#include "CL/sycl/xilinx/fpga/memory_properties.hpp" #endif // SYCL_XILINX_FPGA_HPP diff --git a/sycl/include/CL/sycl/xilinx/fpga/memory_properties.hpp b/sycl/include/CL/sycl/xilinx/fpga/memory_properties.hpp new file mode 100644 index 000000000000..b86954626da4 --- /dev/null +++ b/sycl/include/CL/sycl/xilinx/fpga/memory_properties.hpp @@ -0,0 +1,55 @@ +//==- memory_properties.hpp --- SYCL Xilinx memory proprerties -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SYCL_XILINX_FPGA_MEMORY_PROPERTIES_HPP +#define SYCL_XILINX_FPGA_MEMORY_PROPERTIES_HPP + +#include "CL/sycl/detail/defines.hpp" +#include "CL/sycl/detail/property_helper.hpp" +#include "CL/sycl/properties/accessor_properties.hpp" + +__SYCL_INLINE_NAMESPACE(cl) { + +namespace sycl { +namespace xilinx { +namespace property { + +struct ddr_bank { + template struct instance { + template constexpr bool operator==(const instance &) const { + return A == B; + } + template constexpr bool operator!=(const instance &) const { + return A != B; + } + }; +}; +} // namespace property + +template +using accessor_property_list = sycl::ONEAPI::accessor_property_list; + +template inline constexpr property::ddr_bank::instance ddr_bank; + +} // namespace xilinx + +namespace ONEAPI { +template <> +struct is_compile_time_property : std::true_type {}; +} // namespace ONEAPI + +namespace detail { +template +struct IsCompileTimePropertyInstance> + : std::true_type {}; +} // namespace detail + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) + +#endif diff --git a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp index e3954ed22aaa..bed968bdc311 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp @@ -17,6 +17,7 @@ #include "CL/sycl/xilinx/fpga/ssdm_inst.hpp" #include "CL/sycl/detail/defines.hpp" +#include __SYCL_INLINE_NAMESPACE(cl) { @@ -38,9 +39,11 @@ namespace sycl::xilinx { */ template -void dataflow(T functor) { +void dataflow(T&& functor) { _ssdm_op_SpecDataflowPipeline(-1, ""); - functor(); + /// the std::forward can make a difference when the operator() is l or r value + /// specified. + std::forward(functor)(); } /** Execute loops in a pipelined manner @@ -52,12 +55,13 @@ void dataflow(T functor) { \param[in] f is a function with an innermost loop to be executed in a pipeline way. */ -template -__SYCL_DEVICE_ANNOTATE("xilinx_pipeline") __attribute__((always_inline)) -void pipeline(T functor) { - functor(); +template +__SYCL_DEVICE_ANNOTATE("xilinx_pipeline") +ALWAYS_INLINE void pipeline(T&& functor) { + /// the std::forward can make a difference when the operator() is l or r value + /// specified. + std::forward(functor)(); } - } } diff --git a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp index 854d2441bd55..d2bab320d628 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp @@ -52,8 +52,8 @@ namespace partition { /// This fuction is currently empty but the LowerSYCLMetaData Pass will fill /// it with the required IR. template - __SYCL_DEVICE_ANNOTATE("xilinx_partition_array") __attribute__((always_inline)) - inline void xilinx_partition_array(Ptr, int, int, int) {} + __SYCL_DEVICE_ANNOTATE("xilinx_partition_array") + ALWAYS_INLINE inline void xilinx_partition_array(Ptr, int, int, int) {} /** Represent a cyclic partition. diff --git a/sycl/test/xocc_tests/simple_tests/ddr_bank_test.cpp b/sycl/test/xocc_tests/simple_tests/ddr_bank_test.cpp new file mode 100644 index 000000000000..09b9e37c57cd --- /dev/null +++ b/sycl/test/xocc_tests/simple_tests/ddr_bank_test.cpp @@ -0,0 +1,40 @@ +// REQUIRES: xocc + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out 2>&1 | FileCheck %s +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +int main() { + cl::sycl::buffer Buffer(4); + cl::sycl::queue Queue; + cl::sycl::range<1> NumOfWorkItems{Buffer.get_count()}; + + Queue.submit([&](cl::sycl::handler &cgh) { + sycl::ONEAPI::accessor_property_list PL{sycl::xilinx::ddr_bank<1>}; + // CHECK: :DDR[1] + sycl::accessor Accessor(Buffer, cgh, sycl::write_only, PL); + cgh.parallel_for( + NumOfWorkItems, [=](cl::sycl::id<1> WIid) { + Accessor[WIid] = (cl::sycl::cl_int)WIid.get(0); + }); + }); + Queue.submit([&](cl::sycl::handler &cgh) { + sycl::ONEAPI::accessor_property_list PL{sycl::xilinx::ddr_bank<3>}; + // CHECK: :DDR[3] + sycl::accessor Accessor(Buffer, cgh, sycl::write_only, PL); + cgh.parallel_for( + NumOfWorkItems, [=](cl::sycl::id<1> WIid) { + Accessor[WIid] = (cl::sycl::cl_int)WIid.get(0); + }); + }); + Queue.submit([&](cl::sycl::handler &cgh) { + sycl::ONEAPI::accessor_property_list PL{sycl::xilinx::ddr_bank<0>}; + // CHECK: :DDR[0] + sycl::accessor Accessor(Buffer, cgh, sycl::write_only, PL); + cgh.parallel_for( + NumOfWorkItems, [=](cl::sycl::id<1> WIid) { + Accessor[WIid] = (cl::sycl::cl_int)WIid.get(0); + }); + }); +} diff --git a/sycl/test/xocc_tests/simple_tests/explicit_copy.cpp b/sycl/test/xocc_tests/simple_tests/explicit_copy.cpp index 682da1a80190..10ef5141cc5f 100644 --- a/sycl/test/xocc_tests/simple_tests/explicit_copy.cpp +++ b/sycl/test/xocc_tests/simple_tests/explicit_copy.cpp @@ -1,7 +1,7 @@ // REQUIRES: xocc +// XFAIL: hw // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out - // RUN: %ACC_RUN_PLACEHOLDER %t.out /* From 24e51bda39820e90e7f832c8ea778b780d4a4df3 Mon Sep 17 00:00:00 2001 From: Gauthier Harnisch Date: Wed, 21 Oct 2020 12:19:50 +0200 Subject: [PATCH 2/3] [SYCL] Fix kernel naming for hw_emu hw_emu has higher requierments than sw_emu --- clang/lib/Sema/SemaSYCL.cpp | 40 +++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c54b14126d83..41f31100d9f6 100755 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -749,7 +749,13 @@ static std::string computeUniqueSYCLXOCCName(StringRef Name, /// Those characters need to be used wisely to prevent name collisions. /// It is also useful to use a name that is understandable by the user, /// so we add only 8 character of hash and only if needed. + /// The first character cannot be an underscore or a digit. + /// An underscore can't be followed by an other underscore. constexpr unsigned MaxXOCCSize = 30; + /// Some transformations might make 2 kernel identifiers the same. + /// Allow adding a hash when such transformations are made to avoid possible + /// name conflict. + bool ForceHash = false; std::string Result; Result.reserve(Demangle.size()); @@ -764,24 +770,36 @@ static std::string computeUniqueSYCLXOCCName(StringRef Name, Result.push_back(c); } + // Replace first kernel character name by a 'k' to be compatible with SPIR + if ((Result.front() == '_' || isDigit(Result.front()))) { + Result.front() = 'k'; + ForceHash = true; + } + /// The name alone is guaranteed to be unique, so if fits in the size, it is /// enough. - if (Result.size() < MaxXOCCSize) + if (Result.size() < MaxXOCCSize && !ForceHash) return Result; /// 9 for 8 characters of hash and an '_'. Result.erase(0, Result.size() - (MaxXOCCSize - 9)); - /// Sadly there is only 63 valid characters in C identifiers. - /// So one of them A is repeated. This doesn't hurt entropy to much because - /// it is just 1 out of 64. - Result += '_' + llvm::SHA1::hashToString( - llvm::ArrayRef{ - reinterpret_cast(Name.data()), - Name.size()}, - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789_A"); + if ((Result.front() == '_' || isDigit(Result.front()))) + Result.front() = 'k'; + + if (Result.back() != '_') + Result.push_back('_'); + + /// Sadly there is only 63 valid characters in C identifiers and v++ doesn't + /// deal well with double underscores in identifiers. So A and B are + /// repeated. This doesn't hurt entropy too much because it is just 2 out + /// of 64. + Result += llvm::SHA1::hashToString( + llvm::ArrayRef{reinterpret_cast(Name.data()), + Name.size()}, + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789AB"); if (Result.size() > MaxXOCCSize) Result.resize(MaxXOCCSize); From 3624a9e61045f52ac3036ef6ec6952bfbcdc5b8d Mon Sep 17 00:00:00 2001 From: Gauthier Harnisch Date: Wed, 7 Oct 2020 02:11:26 -0700 Subject: [PATCH 3/3] [SYCL] ReFix edge detection. I thought it was fixed but couldn't get it working. in sw_emu edge detection failed because of array partition metadata. and in hw_emu and hw it failed because of pipeline metadata. array partition metadata don't cause any issues in hw or hw_emu so there just disabled in sw_emu. for pipeline metadata, what v++ generate and what the docs says it should generate don't match. previously we generated what v++ generate, now we generate what the doc says we should and it fixes ths issue. --- clang/lib/Frontend/InitPreprocessor.cpp | 16 +++++++- llvm/lib/SYCL/LowerSYCLMetaData.cpp | 8 ++-- llvm/lib/SYCL/PrepareSYCLOpt.cpp | 39 ++++++++++++++++++- sycl/doc/GettingStartedXilinxFPGA.md | 10 +++++ .../CL/sycl/xilinx/fpga/opt_decorate_func.hpp | 2 +- .../CL/sycl/xilinx/fpga/partition_array.hpp | 5 ++- 6 files changed, 73 insertions(+), 7 deletions(-) diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index cc09932d58b7..1d6c8ffb57b8 100755 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1117,8 +1117,22 @@ static void InitializePredefinedMacros(const TargetInfo &TI, Builder.defineMacro("SYCL_EXTERNAL", "__attribute__((sycl_device))"); // Defines a macro that switches on SPIR intrinsics in SYCL runtime, used // by Xilinx FPGA devices for the moment - if (LangOpts.SYCLXOCCDevice) + if (LangOpts.SYCLXOCCDevice) { Builder.defineMacro("__SYCL_SPIR_DEVICE__"); + switch (TI.getTriple().getSubArch()) { + case llvm::Triple::FPGASubArch_sw_emu: + Builder.defineMacro("__SYCL_XILINX_SW_EMU_MODE__"); + break; + case llvm::Triple::FPGASubArch_hw_emu: + Builder.defineMacro("__SYCL_XILINX_HW_EMU_MODE__"); + break; + case llvm::Triple::FPGASubArch_hw: + Builder.defineMacro("__SYCL_XILINX_HW_MODE__"); + break; + default: + break; + } + } if (TI.getTriple().isNVPTX()) { Builder.defineMacro("__SYCL_NVPTX__", "1"); diff --git a/llvm/lib/SYCL/LowerSYCLMetaData.cpp b/llvm/lib/SYCL/LowerSYCLMetaData.cpp index 6b8f728a20a3..71d29b407671 100644 --- a/llvm/lib/SYCL/LowerSYCLMetaData.cpp +++ b/llvm/lib/SYCL/LowerSYCLMetaData.cpp @@ -94,11 +94,10 @@ struct LSMDState { ResultMD.push_back(MDNode::get( Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.enable"), ConstantAsMetadata::get( - ConstantInt::get(Type::getInt32Ty(Ctx), 1)), + ConstantInt::get(Type::getInt32Ty(Ctx), -1)), ConstantAsMetadata::get( ConstantInt::getFalse(Type::getInt1Ty(Ctx))), - ConstantAsMetadata::get( - ConstantInt::get(Type::getInt8Ty(Ctx), -1))})); + })); MDNode *MDN = MDNode::getDistinct(Ctx, ResultMD); BB->getTerminator()->setMetadata(LLVMContext::MD_loop, MDN); BB->getTerminator() @@ -162,6 +161,9 @@ struct LowerSYCLMetaData : public ModulePass { bool runOnModule(Module &M) override { return LSMDState(M).run(); } + virtual StringRef getPassName() const override { + return "LowerSYCLMetaData"; + } }; } diff --git a/llvm/lib/SYCL/PrepareSYCLOpt.cpp b/llvm/lib/SYCL/PrepareSYCLOpt.cpp index 072629261592..518a113ec17b 100644 --- a/llvm/lib/SYCL/PrepareSYCLOpt.cpp +++ b/llvm/lib/SYCL/PrepareSYCLOpt.cpp @@ -15,6 +15,7 @@ #include #include +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Intrinsics.h" @@ -48,6 +49,8 @@ struct PrepareSYCLOpt : public ModulePass { assert(F.use_empty()); continue; } + if (F.isIntrinsic()) + continue; F.setCallingConv(CallingConv::SPIR_FUNC); for (Value* V : F.users()) { if (auto* Call = dyn_cast(V)) @@ -72,14 +75,48 @@ struct PrepareSYCLOpt : public ModulePass { I->eraseFromParent(); } + /// This will change array partition such that after the O3 pipeline it + /// matched very closely what v++ generates. + /// This will change the type of the alloca referenced by the array partition + /// into an array. and change the argument received by xlx_array_partition + /// into a pointer on an array. + void lowerArrayPartition(Module &M) { + Function* Func = Intrinsic::getDeclaration(&M, Intrinsic::sideeffect); + for (Use& U : Func->uses()) { + auto* Usr = dyn_cast(U.getUser()); + if (!Usr) + continue; + if (!Usr->getOperandBundle("xlx_array_partition")) + continue; + Use& Ptr = U.getUser()->getOperandUse(0); + Value* Obj = getUnderlyingObject(Ptr); + if (!isa(Obj)) + return; + auto* Alloca = cast(Obj); + auto *Replacement = + new AllocaInst(Ptr->getType()->getPointerElementType(), 0, + ConstantInt::get(Type::getInt32Ty(M.getContext()), 1), + Align(128), ""); + Replacement->insertAfter(Alloca); + Instruction* Cast = BitCastInst::Create( + Instruction::BitCast, Replacement, Alloca->getType()); + Cast->insertAfter(Replacement); + Alloca->replaceAllUsesWith(Cast); + Value* Zero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + Instruction* GEP = GetElementPtrInst::Create(nullptr, Replacement, {Zero}); + GEP->insertAfter(Cast); + Ptr.set(GEP); + } + } + bool runOnModule(Module &M) override { turnNonKernelsIntoPrivate(M); setCallingConventions(M); + lowerArrayPartition(M); removeAnnotationsIntrisic(M); return true; } }; - } namespace llvm { diff --git a/sycl/doc/GettingStartedXilinxFPGA.md b/sycl/doc/GettingStartedXilinxFPGA.md index 1b1a99a98e89..14395364c768 100644 --- a/sycl/doc/GettingStartedXilinxFPGA.md +++ b/sycl/doc/GettingStartedXilinxFPGA.md @@ -472,6 +472,16 @@ sudo rmmod xocl sudo modprobe xocl ``` +## Xilinx Macros + +``__SYCL_XILINX_SW_EMU_MODE__`` will be defined when compiling device code in sw_emu mode + +``__SYCL_XILINX_HW_EMU_MODE__`` will be defined when compiling device code in hw_emu mode + +``__SYCL_XILINX_HW_MODE__`` will be defined when compiling device code in hw mode + +when compiling host code none of them will be defined. + ## Xilinx FPGA SYCL compiler architecture [Architecture of the Xilinx SYCL diff --git a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp index bed968bdc311..c51c88e66292 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/opt_decorate_func.hpp @@ -57,7 +57,7 @@ void dataflow(T&& functor) { */ template __SYCL_DEVICE_ANNOTATE("xilinx_pipeline") -ALWAYS_INLINE void pipeline(T&& functor) { +__SYCL_ALWAYS_INLINE void pipeline(T&& functor) { /// the std::forward can make a difference when the operator() is l or r value /// specified. std::forward(functor)(); diff --git a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp index d2bab320d628..983d0745c225 100644 --- a/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp +++ b/sycl/include/CL/sycl/xilinx/fpga/partition_array.hpp @@ -52,8 +52,11 @@ namespace partition { /// This fuction is currently empty but the LowerSYCLMetaData Pass will fill /// it with the required IR. template +#if defined(__SYCL_XILINX_HW_EMU_MODE__) || defined(__SYCL_XILINX_HW_MODE__) __SYCL_DEVICE_ANNOTATE("xilinx_partition_array") - ALWAYS_INLINE inline void xilinx_partition_array(Ptr, int, int, int) {} +#endif + __SYCL_ALWAYS_INLINE + inline void xilinx_partition_array(Ptr, int, int, int) {} /** Represent a cyclic partition.