Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix edgedetection2 #89

Merged
merged 5 commits into from
Nov 3, 2020
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion clang/lib/Frontend/InitPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1117,8 +1117,22 @@ static void InitializePredefinedMacros(const TargetInfo &TI,
Builder.defineMacro("SYCL_EXTERNAL", "__attribute__((sycl_device))");
// Defines a macro that switches on SPIR intrinsics in SYCL runtime, used
// by Xilinx FPGA devices for the moment
if (LangOpts.SYCLXOCCDevice)
if (LangOpts.SYCLXOCCDevice) {
Builder.defineMacro("__SYCL_SPIR_DEVICE__");
switch (TI.getTriple().getSubArch()) {
case llvm::Triple::FPGASubArch_sw_emu:
Builder.defineMacro("__SYCL_XILINX_SW_EMU_MODE__");
break;
case llvm::Triple::FPGASubArch_hw_emu:
Builder.defineMacro("__SYCL_XILINX_HW_EMU_MODE__");
break;
case llvm::Triple::FPGASubArch_hw:
Builder.defineMacro("__SYCL_XILINX_HW_MODE__");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These macros seems very useful.
They should be described somewhere in the documentation.

break;
default:
break;
}
}

if (TI.getTriple().isNVPTX()) {
Builder.defineMacro("__SYCL_NVPTX__", "1");
Expand Down
87 changes: 76 additions & 11 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,13 @@ static std::string computeUniqueSYCLXOCCName(StringRef Name,
/// Those characters need to be used wisely to prevent name collisions.
/// It is also useful to use a name that is understandable by the user,
/// so we add only 8 character of hash and only if needed.
/// The first character cannot be an underscore or a digit.
/// An underscore can't be followed by an other underscore.
constexpr unsigned MaxXOCCSize = 30;
/// Some transformations might make 2 kernel identifiers the same.
/// Allow adding a hash when such transformations are made to avoid possible
/// name conflict.
bool ForceHash = false;

std::string Result;
Result.reserve(Demangle.size());
Expand All @@ -764,24 +770,36 @@ static std::string computeUniqueSYCLXOCCName(StringRef Name,
Result.push_back(c);
}

// Replace first kernel character name by a 'k' to be compatible with SPIR
if ((Result.front() == '_' || isDigit(Result.front()))) {
Result.front() = 'k';
ForceHash = true;
}

/// The name alone is guaranteed to be unique, so if fits in the size, it is
/// enough.
if (Result.size() < MaxXOCCSize)
if (Result.size() < MaxXOCCSize && !ForceHash)
return Result;

/// 9 for 8 characters of hash and an '_'.
Result.erase(0, Result.size() - (MaxXOCCSize - 9));

/// Sadly there is only 63 valid characters in C identifiers.
/// So one of them A is repeated. This doesn't hurt entropy to much because
/// it is just 1 out of 64.
Result += '_' + llvm::SHA1::hashToString(
llvm::ArrayRef<uint8_t>{
reinterpret_cast<const uint8_t *>(Name.data()),
Name.size()},
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789_A");
if ((Result.front() == '_' || isDigit(Result.front())))
Result.front() = 'k';

if (Result.back() != '_')
Result.push_back('_');

/// Sadly there is only 63 valid characters in C identifiers and v++ doesn't
/// deal well with double underscores in identifiers. So A and B are
/// repeated. This doesn't hurt entropy too much because it is just 2 out
/// of 64.
Result += llvm::SHA1::hashToString(
llvm::ArrayRef<uint8_t>{reinterpret_cast<const uint8_t *>(Name.data()),
Name.size()},
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789AB");

if (Result.size() > MaxXOCCSize)
Result.resize(MaxXOCCSize);
Expand Down Expand Up @@ -1666,6 +1684,25 @@ class SyclKernelDecompMarker : public SyclKernelFieldHandler {
}
};

static bool isSyclXilinxType(const QualType &Ty) {
static std::array<StringRef, 3> Namespaces = {"cl", "sycl", "xilinx"};
llvm::SmallVector<const DeclContext *, 8> CtxStack;
CtxStack.push_back(cast<DeclContext>(Ty->getAsTagDecl()));
while (!isa<TranslationUnitDecl>(CtxStack.back()->getParent()))
CtxStack.push_back(CtxStack.back()->getParent());
for (unsigned Idx = 0; Idx < Namespaces.size(); Idx++) {
auto *NS = dyn_cast<NamespaceDecl>(CtxStack.pop_back_val());
if (!NS)
return false;
IdentifierInfo *II = NS->getIdentifier();
if (!II)
return false;
if (!II->isStr(Namespaces[Idx]))
return false;
}
return true;
}

// A type to Create and own the FunctionDecl for the kernel.
class SyclKernelDeclCreator : public SyclKernelFieldHandler {
FunctionDecl *KernelDecl;
Expand Down Expand Up @@ -1702,6 +1739,33 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
Params.push_back(NewParam);
}

// Obtain an integer value stored in a template parameter of buffer_location
// property to pass it to buffer_location kernel attribute
void handleXilinxProperty(ParmVarDecl *Param, QualType PropTy,
SourceLocation Loc) {
if (!isSyclXilinxType(PropTy))
return;
/// TODO: when D88645 lands update this code to use that instead.
ASTContext &Ctx = SemaRef.getASTContext();
const CXXRecordDecl *RD = PropTy->getAsCXXRecordDecl();
const CXXRecordDecl *PRD = cast<CXXRecordDecl>(RD->getParent());
std::string Args;
if (const auto *PropDecl = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
for (auto &Arg : PropDecl->getTemplateArgs().asArray()) {
switch (Arg.getKind()) {
case TemplateArgument::Integral:
Args += "_" + std::to_string(static_cast<int>(
Arg.getAsIntegral().getExtValue()));
break;
default:
llvm_unreachable("unimplemented");
}
}
}
Param->addAttr(AnnotateAttr::CreateImplicit(
Ctx, "xilinx_" + PRD->getName().str() + Args));
}

// Handle accessor properties. If any properties were found in
// the accessor_property_list - add the appropriate attributes to ParmVarDecl.
void handleAccessorPropertyList(ParmVarDecl *Param,
Expand All @@ -1722,6 +1786,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler {
QualType PropTy = Prop->getAsType();
if (Util::isSyclBufferLocationType(PropTy))
handleBufferLocationProperty(Param, PropTy, Loc);
handleXilinxProperty(Param, PropTy, Loc);
}
}

Expand Down
51 changes: 49 additions & 2 deletions llvm/lib/SYCL/KernelPropGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
#include <regex>
#include <string>

#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/SYCL/KernelPropGen.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
Expand Down Expand Up @@ -50,6 +55,8 @@ struct KernelPropGen : public ModulePass {

static char ID; // Pass identification, replacement for typeid

llvm::SmallDenseMap<llvm::AllocaInst *, unsigned, 8> UserSpecifiedDDRBanks;

KernelPropGen() : ModulePass(ID) {}

/// Test if a function is a SPIR kernel
Expand All @@ -70,11 +77,50 @@ struct KernelPropGen : public ModulePass {
return FileFD;
}

void CollectUserSpecifiedDDRBanks(Function &F) {
constexpr StringRef Prefix = "xilinx_ddr_bank_";
for (Instruction &I : instructions(F)) {
auto *CB = dyn_cast<CallBase>(&I);
if (!CB || CB->getIntrinsicID() != Intrinsic::var_annotation)
continue;
auto *Alloca =
dyn_cast_or_null<AllocaInst>(getUnderlyingObject(CB->getOperand(0)));
auto *Str = cast<ConstantDataArray>(
cast<GlobalVariable>(getUnderlyingObject(CB->getOperand(1)))
->getOperand(0));
if (!Alloca)
continue;
StringRef Annot = Str->getRawDataValues();
if (!Annot.startswith(Prefix))
continue;
Annot = Annot.drop_front(Prefix.size()).drop_back();
unsigned Bank = 0;
if (Annot.getAsInteger(10, Bank))
continue;
UserSpecifiedDDRBanks[Alloca] = Bank;
}
}

unsigned findDDRBankFor(Argument *Arg) {
for (User *U : Arg->users()) {
if (auto *Store = dyn_cast<StoreInst>(U))
if (Store->getValueOperand() == Arg) {
auto Lookup = UserSpecifiedDDRBanks.find(dyn_cast_or_null<AllocaInst>(
getUnderlyingObject(Store->getPointerOperand())));
if (Lookup == UserSpecifiedDDRBanks.end())
continue;
return Lookup->second;
}
}
return 0;
}

void GenerateXOCCPropertyScript(Module &M, llvm::raw_fd_ostream &O) {
llvm::SmallString<512> kernelNames;
llvm::SmallString<512> DDRArgs;
for (auto &F : M.functions()) {
if (isKernel(F)) {
CollectUserSpecifiedDDRBanks(F);
kernelNames += (" \"" + F.getName() + "\" ").str();

for (auto& Arg : F.args()) {
Expand Down Expand Up @@ -106,8 +152,9 @@ struct KernelPropGen : public ModulePass {
// default compute unit name. If more than one CU is generated
// (which we don't support yet in any case) then they would be
// KernelName_2..KernelName_3 etc.
DDRArgs += ("--sp " + F.getName() + "_1." + Arg.getName()
+ ":DDR[0] ").str();
DDRArgs += ("--sp " + F.getName() + "_1." + Arg.getName() +
":DDR[" + std::to_string(findDDRBankFor(&Arg)) + "] ")
.str();
}
}
O << "\n"; // line break for new set of kernel properties
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/SYCL/LowerSYCLMetaData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,10 @@ struct LSMDState {
ResultMD.push_back(MDNode::get(
Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.enable"),
ConstantAsMetadata::get(
ConstantInt::get(Type::getInt32Ty(Ctx), 1)),
ConstantInt::get(Type::getInt32Ty(Ctx), -1)),
ConstantAsMetadata::get(
ConstantInt::getFalse(Type::getInt1Ty(Ctx))),
ConstantAsMetadata::get(
ConstantInt::get(Type::getInt8Ty(Ctx), -1))}));
}));
MDNode *MDN = MDNode::getDistinct(Ctx, ResultMD);
BB->getTerminator()->setMetadata(LLVMContext::MD_loop, MDN);
BB->getTerminator()
Expand Down Expand Up @@ -162,6 +161,9 @@ struct LowerSYCLMetaData : public ModulePass {
bool runOnModule(Module &M) override {
return LSMDState(M).run();
}
virtual StringRef getPassName() const override {
return "LowerSYCLMetaData";
}
};
}

Expand Down
57 changes: 56 additions & 1 deletion llvm/lib/SYCL/PrepareSYCLOpt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
#include <regex>
#include <string>

#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/SYCL/PrepareSYCLOpt.h"
#include "llvm/Support/Casting.h"

Expand Down Expand Up @@ -47,6 +49,8 @@ struct PrepareSYCLOpt : public ModulePass {
assert(F.use_empty());
continue;
}
if (F.isIntrinsic())
continue;
F.setCallingConv(CallingConv::SPIR_FUNC);
for (Value* V : F.users()) {
if (auto* Call = dyn_cast<CallBase>(V))
Expand All @@ -55,13 +59,64 @@ struct PrepareSYCLOpt : public ModulePass {
}
}

/// At this point in the pipeline Annotations intrinsic have all been
/// converted into what they need to be. But they can still be present and
/// have pointer on pointer as arguments which v++ can't deal with.
void removeAnnotationsIntrisic(Module &M) {
SmallVector<Instruction *, 16> ToRemove;
for (Function &F : M.functions())
if (F.getIntrinsicID() == Intrinsic::annotation ||
F.getIntrinsicID() == Intrinsic::ptr_annotation ||
F.getIntrinsicID() == Intrinsic::var_annotation)
for (User *U : F.users())
if (auto *I = dyn_cast<Instruction>(U))
ToRemove.push_back(I);
for (Instruction *I : ToRemove)
I->eraseFromParent();
}

/// This will change array partition such that after the O3 pipeline it
/// matched very closely what v++ generates.
/// This will change the type of the alloca referenced by the array partition
/// into an array. and change the argument received by xlx_array_partition
/// into a pointer on an array.
void lowerArrayPartition(Module &M) {
Function* Func = Intrinsic::getDeclaration(&M, Intrinsic::sideeffect);
for (Use& U : Func->uses()) {
auto* Usr = dyn_cast<CallBase>(U.getUser());
if (!Usr)
continue;
if (!Usr->getOperandBundle("xlx_array_partition"))
continue;
Use& Ptr = U.getUser()->getOperandUse(0);
Value* Obj = getUnderlyingObject(Ptr);
if (!isa<AllocaInst>(Obj))
return;
auto* Alloca = cast<AllocaInst>(Obj);
auto *Replacement =
new AllocaInst(Ptr->getType()->getPointerElementType(), 0,
ConstantInt::get(Type::getInt32Ty(M.getContext()), 1),
Align(128), "");
Replacement->insertAfter(Alloca);
Instruction* Cast = BitCastInst::Create(
Instruction::BitCast, Replacement, Alloca->getType());
Cast->insertAfter(Replacement);
Alloca->replaceAllUsesWith(Cast);
Value* Zero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
Instruction* GEP = GetElementPtrInst::Create(nullptr, Replacement, {Zero});
GEP->insertAfter(Cast);
Ptr.set(GEP);
}
}

bool runOnModule(Module &M) override {
turnNonKernelsIntoPrivate(M);
setCallingConventions(M);
lowerArrayPartition(M);
removeAnnotationsIntrisic(M);
return true;
}
};

}

namespace llvm {
Expand Down
10 changes: 10 additions & 0 deletions sycl/doc/GettingStartedXilinxFPGA.md
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,16 @@ sudo rmmod xocl
sudo modprobe xocl
```

## Xilinx Macros

``__SYCL_XILINX_SW_EMU_MODE__`` will be defined when compiling device code in sw_emu mode

``__SYCL_XILINX_HW_EMU_MODE__`` will be defined when compiling device code in hw_emu mode

``__SYCL_XILINX_HW_MODE__`` will be defined when compiling device code in hw mode

when compiling host code none of them will be defined.

## Xilinx FPGA SYCL compiler architecture

[Architecture of the Xilinx SYCL
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ enum PropWithDataKind {
BufferContextBound,
ImageUseMutex,
ImageContextBound,
PropWithDataKindSize
PropWithDataKindSize,
};

// Base class for dataless properties, needed to check that the type of an
Expand Down
1 change: 1 addition & 0 deletions sycl/include/CL/sycl/xilinx/fpga.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@
#include "CL/sycl/xilinx/fpga/opt_decorate_func.hpp"
#include "CL/sycl/xilinx/fpga/partition_array.hpp"
#include "CL/sycl/xilinx/fpga/kernel_properties.hpp"
#include "CL/sycl/xilinx/fpga/memory_properties.hpp"

#endif // SYCL_XILINX_FPGA_HPP
Loading