Skip to content

Commit

Permalink
Replace loader handles with field at start of handle data
Browse files Browse the repository at this point in the history
This replaces the handle logic in the loader from wrapped pointers
to a ddi table at the start of the handle struct itself.

Just testing something...
  • Loading branch information
RossBrunton committed Jan 31, 2025
1 parent 470dfaf commit 3c26247
Show file tree
Hide file tree
Showing 81 changed files with 1,267 additions and 4,151 deletions.
17 changes: 0 additions & 17 deletions scripts/generate_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,23 +201,6 @@ def _mako_lib_cpp(path, namespace, tags, version, specs, meta):
def _mako_loader_cpp(path, namespace, tags, version, specs, meta):
print("make_loader_cpp path %s namespace %s version %s\n" %(path, namespace, version))
loc = 0
template = "ldrddi.hpp.mako"
fin = os.path.join(templates_dir, template)

name = "%s_ldrddi"%(namespace)
filename = "%s.hpp"%(name)
fout = os.path.join(path, filename)

print("Generating %s..."%fout)
loc += util.makoWrite(
fin, fout,
name=name,
ver=version,
namespace=namespace,
tags=tags,
specs=specs,
meta=meta)

template = "ldrddi.cpp.mako"
fin = os.path.join(templates_dir, template)

Expand Down
11 changes: 0 additions & 11 deletions scripts/templates/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,17 +1295,6 @@ def get_initial_null_set(obj):
return 'if (nullptr != {0}) {{*{0} = nullptr;}}'.format(lvalue)
return ""

"""
Public:
returns true if the function always wraps output pointers in loader handles
"""
def always_wrap_outputs(obj):
cname = obj_traits.class_name(obj)
return (cname, obj['name']) in [
('$xProgram', 'Link'),
('$xProgram', 'LinkExp'),
]

"""
Private:
returns the list of parameters, filtering based on desc tags
Expand Down
253 changes: 22 additions & 231 deletions scripts/templates/ldrddi.cpp.mako

Large diffs are not rendered by default.

57 changes: 0 additions & 57 deletions scripts/templates/ldrddi.hpp.mako

This file was deleted.

34 changes: 29 additions & 5 deletions scripts/templates/ur_interface_loader.cpp.mako
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ from templates import helper as th
//===----------------------------------------------------------------------===//
#include <${n}_api.h>
#include <${n}_ddi.h>
#include <mutex>

#include "ur_interface_loader.hpp"

Expand Down Expand Up @@ -68,22 +69,45 @@ ${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}(
} // extern "C"
#endif

#ifdef UR_STATIC_ADAPTER_${Adapter}
namespace ur::${adapter} {
ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) {
if (ddi == nullptr) {
namespace {
ur_result_t populateDdiTable(ur_dditable_t *ddi) {
if (ddi == nullptr) {
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
}

ur_result_t result;

#ifdef UR_STATIC_ADAPTER_${Adapter}
#define NAMESPACE_ ::ur::${adapter}
#else
#define NAMESPACE_
#endif

%for tbl in th.get_pfntables(specs, meta, n, tags):
result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} );
result = NAMESPACE_::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} );
if (result != UR_RESULT_SUCCESS)
return result;
%endfor

#undef NAMESPACE_

return result;
}
}


namespace ur::${adapter} {
const ${x}_dditable_t *ddi_getter::value() {
static std::once_flag flag;
static ${x}_dditable_t table;

std::call_once(flag, []() { populateDdiTable(&table); });
return &table;
}

#ifdef UR_STATIC_ADAPTER_${Adapter}
ur_result_t urAdapterGetDdiTables(${x}_dditable_t *ddi) {
return populateDdiTable(ddi);
}
#endif
}
6 changes: 6 additions & 0 deletions scripts/templates/ur_interface_loader.hpp.mako
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ from templates import helper as th
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#pragma once

#include <${n}_api.h>
#include <${n}_ddi.h>

Expand All @@ -36,4 +38,8 @@ ${x}_result_t ${th.make_func_name(n, tags, obj)}(
#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO
ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi);
#endif

struct ddi_getter {
const static ${x}_dditable_t *value();
};
}
5 changes: 3 additions & 2 deletions source/adapters/cuda/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "logger/ur_logger.hpp"
#include "tracing.hpp"

struct ur_adapter_handle_t_ {
struct ur_adapter_handle_t_ : ur_handle_t_ {
std::atomic<uint32_t> RefCount = 0;
std::mutex Mutex;
struct cuda_tracing_context_t_ *TracingCtx = nullptr;
Expand All @@ -41,7 +41,8 @@ class ur_legacy_sink : public logger::Sink {
// through UR entry points.
// https://github.com/oneapi-src/unified-runtime/issues/1330
ur_adapter_handle_t_::ur_adapter_handle_t_()
: logger(logger::get_logger("cuda",
: ur_handle_t_(),
logger(logger::get_logger("cuda",
/*default_log_level*/ logger::Level::ERR)) {

if (std::getenv("UR_LOG_CUDA") != nullptr)
Expand Down
5 changes: 5 additions & 0 deletions source/adapters/cuda/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,8 @@ void assertion(bool Condition, const char *Message = nullptr);

} // namespace ur
} // namespace detail

struct cuda_ddi_getter {
const static ur_dditable_t *value();
};
using ur_handle_t_ = ur_handle_base_t_<cuda_ddi_getter>;
4 changes: 2 additions & 2 deletions source/adapters/cuda/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ typedef void (*ur_context_extended_deleter_t)(void *user_data);
/// if necessary.
///
///
struct ur_context_handle_t_ {
struct ur_context_handle_t_ : ur_handle_t_ {

struct deleter_data {
ur_context_extended_deleter_t Function;
Expand All @@ -87,7 +87,7 @@ struct ur_context_handle_t_ {
std::atomic_uint32_t RefCount;

ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices)
: Devices{Devs, Devs + NumDevices}, RefCount{1} {
: ur_handle_t_(), Devices{Devs, Devs + NumDevices}, RefCount{1} {
for (auto &Dev : Devices) {
urDeviceRetain(Dev);
}
Expand Down
6 changes: 3 additions & 3 deletions source/adapters/cuda/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include "common.hpp"

struct ur_device_handle_t_ {
struct ur_device_handle_t_ : ur_handle_t_ {
private:
using native_type = CUdevice;

Expand All @@ -37,8 +37,8 @@ struct ur_device_handle_t_ {
public:
ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase,
ur_platform_handle_t platform, uint32_t DevIndex)
: CuDevice(cuDevice), CuContext(cuContext), EvBase(evBase), RefCount{1},
Platform(platform), DeviceIndex{DevIndex} {
: ur_handle_t_(), CuDevice(cuDevice), CuContext(cuContext),
EvBase(evBase), RefCount{1}, Platform(platform), DeviceIndex{DevIndex} {

UR_CHECK_ERROR(cuDeviceGetAttribute(
&MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK,
Expand Down
13 changes: 7 additions & 6 deletions source/adapters/cuda/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,
native_type EvEnd, native_type EvQueued,
native_type EvStart, CUstream Stream,
uint32_t StreamToken)
: CommandType{Type}, RefCount{1}, HasOwnership{true},
: ur_handle_t_(), CommandType{Type}, RefCount{1}, HasOwnership{true},
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart},
EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} {
Expand All @@ -34,11 +34,12 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type,

ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context,
CUevent EventNative)
: CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false},
HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false},
IsInterop{true}, StreamToken{std::numeric_limits<uint32_t>::max()},
EventID{0}, EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr},
Queue{nullptr}, Stream{nullptr}, Context{Context} {
: ur_handle_t_(), CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1},
HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false},
IsStarted{false}, IsInterop{true},
StreamToken{std::numeric_limits<uint32_t>::max()}, EventID{0},
EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr},
Stream{nullptr}, Context{Context} {
urContextRetain(Context);
}

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/cuda/event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

/// UR Event mapping to CUevent
///
struct ur_event_handle_t_ {
struct ur_event_handle_t_ : ur_handle_t_ {
public:
using native_type = CUevent;

Expand Down
7 changes: 4 additions & 3 deletions source/adapters/cuda/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
/// A compiler pass converts the UR API local memory model into the
/// CUDA shared model. This object simply calculates the total of
/// shared memory, and the initial offsets of each parameter.
struct ur_kernel_handle_t_ {
struct ur_kernel_handle_t_ : ur_handle_t_ {
using native_type = CUfunction;

native_type Function;
Expand Down Expand Up @@ -250,8 +250,9 @@ struct ur_kernel_handle_t_ {
ur_kernel_handle_t_(CUfunction Func, CUfunction FuncWithOffsetParam,
const char *Name, ur_program_handle_t Program,
ur_context_handle_t Context)
: Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam},
Name{Name}, Context{Context}, Program{Program}, RefCount{1} {
: ur_handle_t_(), Function{Func},
FunctionWithOffsetParam{FuncWithOffsetParam}, Name{Name},
Context{Context}, Program{Program}, RefCount{1} {
urProgramRetain(Program);
urContextRetain(Context);
/// Note: this code assumes that there is only one device per context
Expand Down
12 changes: 6 additions & 6 deletions source/adapters/cuda/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ struct SurfaceMem {
/// is on a different device, marked by
/// LastQueueWritingToMemObj->getDevice()
///
struct ur_mem_handle_t_ {
struct ur_mem_handle_t_ : ur_handle_t_ {
// Context where the memory object is accessible
ur_context_handle_t Context;

Expand Down Expand Up @@ -345,17 +345,17 @@ struct ur_mem_handle_t_ {
/// Constructs the UR mem handler for a non-typed allocation ("buffer")
ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags,
BufferMem::AllocMode Mode, void *HostPtr, size_t Size)
: Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
: ur_handle_t_(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false),
Mem{std::in_place_type<BufferMem>, Ctxt, this, Mode, HostPtr, Size} {
urContextRetain(Context);
};

// Subbuffer constructor
ur_mem_handle_t_(ur_mem_handle_t Parent, size_t SubBufferOffset)
: Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags},
HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(),
false),
: ur_handle_t_(), Context{Parent->Context}, RefCount{1},
MemFlags{Parent->MemFlags}, HaveMigratedToDeviceSinceLastWrite(
Parent->Context->Devices.size(), false),
Mem{BufferMem{std::get<BufferMem>(Parent->Mem)}} {
auto &SubBuffer = std::get<BufferMem>(Mem);
SubBuffer.Parent = Parent;
Expand All @@ -376,7 +376,7 @@ struct ur_mem_handle_t_ {
ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags,
ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc,
void *HostPtr)
: Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
: ur_handle_t_(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags},
HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false),
Mem{std::in_place_type<SurfaceMem>,
Ctxt,
Expand Down
6 changes: 3 additions & 3 deletions source/adapters/cuda/physical_mem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
/// UR queue mapping on physical memory allocations used in virtual memory
/// management.
///
struct ur_physical_mem_handle_t_ {
struct ur_physical_mem_handle_t_ : ur_handle_t_ {
using native_type = CUmemGenericAllocationHandle;

std::atomic_uint32_t RefCount;
Expand All @@ -33,8 +33,8 @@ struct ur_physical_mem_handle_t_ {
ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx,
ur_device_handle_t Device, size_t Size,
ur_physical_mem_properties_t Properties)
: RefCount(1), PhysicalMem(PhysMem), Context(Ctx), Device(Device),
Size(Size), Properties(Properties) {
: ur_handle_t_(), RefCount(1), PhysicalMem(PhysMem), Context(Ctx),
Device(Device), Size(Size), Properties(Properties) {
urContextRetain(Context);
urDeviceRetain(Device);
}
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/cuda/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@
//===----------------------------------------------------------------------===//
#pragma once

#include "common.hpp"
#include <ur/ur.hpp>
#include <vector>

struct ur_platform_handle_t_ {
struct ur_platform_handle_t_ : ur_handle_t_ {
std::vector<std::unique_ptr<ur_device_handle_t_>> Devices;
};
9 changes: 5 additions & 4 deletions source/adapters/cuda/program.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include "context.hpp"

struct ur_program_handle_t_ {
struct ur_program_handle_t_ : ur_handle_t_ {
using native_type = CUmodule;
native_type Module;
const char *Binary;
Expand Down Expand Up @@ -47,9 +47,10 @@ struct ur_program_handle_t_ {
ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE;

ur_program_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device)
: Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1},
Context{Context}, Device{Device}, KernelReqdWorkGroupSizeMD{},
KernelMaxWorkGroupSizeMD{}, KernelMaxLinearWorkGroupSizeMD{} {
: ur_handle_t_(), Module{nullptr}, Binary{}, BinarySizeInBytes{0},
RefCount{1}, Context{Context}, Device{Device},
KernelReqdWorkGroupSizeMD{}, KernelMaxWorkGroupSizeMD{},
KernelMaxLinearWorkGroupSizeMD{} {
urContextRetain(Context);
urDeviceRetain(Device);
}
Expand Down
Loading

0 comments on commit 3c26247

Please sign in to comment.