diff --git a/scripts/generate_code.py b/scripts/generate_code.py index cdc3dfa229..b4dfc9ead0 100644 --- a/scripts/generate_code.py +++ b/scripts/generate_code.py @@ -201,23 +201,6 @@ def _mako_lib_cpp(path, namespace, tags, version, specs, meta): def _mako_loader_cpp(path, namespace, tags, version, specs, meta): print("make_loader_cpp path %s namespace %s version %s\n" %(path, namespace, version)) loc = 0 - template = "ldrddi.hpp.mako" - fin = os.path.join(templates_dir, template) - - name = "%s_ldrddi"%(namespace) - filename = "%s.hpp"%(name) - fout = os.path.join(path, filename) - - print("Generating %s..."%fout) - loc += util.makoWrite( - fin, fout, - name=name, - ver=version, - namespace=namespace, - tags=tags, - specs=specs, - meta=meta) - template = "ldrddi.cpp.mako" fin = os.path.join(templates_dir, template) diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index 8b3d225219..1fad6eea6e 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -1295,17 +1295,6 @@ def get_initial_null_set(obj): return 'if (nullptr != {0}) {{*{0} = nullptr;}}'.format(lvalue) return "" -""" -Public: - returns true if the function always wraps output pointers in loader handles -""" -def always_wrap_outputs(obj): - cname = obj_traits.class_name(obj) - return (cname, obj['name']) in [ - ('$xProgram', 'Link'), - ('$xProgram', 'LinkExp'), - ] - """ Private: returns the list of parameters, filtering based on desc tags diff --git a/scripts/templates/ldrddi.cpp.mako b/scripts/templates/ldrddi.cpp.mako index ba191a9ceb..6b9d99e91d 100644 --- a/scripts/templates/ldrddi.cpp.mako +++ b/scripts/templates/ldrddi.cpp.mako @@ -33,24 +33,6 @@ namespace ur_loader else: func_basename = func_name %> - %if func_basename == "EventSetCallback": - namespace { - struct event_callback_wrapper_data_t { - ${x}_event_callback_t fn; - ${x}_event_handle_t event; - void *userData; - }; - - void event_callback_wrapper([[maybe_unused]] ${x}_event_handle_t hEvent, - ${x}_execution_info_t execStatus, void *pUserData) { - auto *wrapper = - reinterpret_cast(pUserData); - (wrapper->fn)(wrapper->event, execStatus, wrapper->userData); - delete wrapper; - } - } - - %endif /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for ${func_name} %if 'condition' in obj: @@ -62,9 +44,8 @@ namespace ur_loader %endfor ) { - ${x}_result_t result = ${X}_RESULT_SUCCESS;<% - add_local = False - %>${th.get_initial_null_set(obj)} + ${x}_result_t result = ${X}_RESULT_SUCCESS; + ${th.get_initial_null_set(obj)} [[maybe_unused]] auto context = getContext(); %if func_basename == "AdapterGet": @@ -76,18 +57,7 @@ namespace ur_loader { if(platform.initStatus != ${X}_RESULT_SUCCESS) continue; - platform.dditable.${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( 1, &${obj['params'][1]['name']}[adapterIndex], nullptr ); - try - { - ${obj['params'][1]['name']}[adapterIndex] = reinterpret_cast<${n}_adapter_handle_t>(context->factories.${n}_adapter_factory.getInstance( - ${obj['params'][1]['name']}[adapterIndex], &platform.dditable - )); - } - catch( std::bad_alloc &) - { - result = ${X}_RESULT_ERROR_OUT_OF_HOST_MEMORY; - break; - } + platform.dditable.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( 1, &${obj['params'][1]['name']}[adapterIndex], nullptr ); adapterIndex++; if (adapterIndex == NumEntries) { break; @@ -106,15 +76,15 @@ namespace ur_loader for( uint32_t adapter_index = 0; adapter_index < ${obj['params'][1]['name']}; adapter_index++) { // extract adapter's function pointer table - auto dditable = - reinterpret_cast<${n}_platform_object_t *>( ${obj['params'][0]['name']}[adapter_index])->dditable; + auto *dditable = + *reinterpret_cast<${n}_dditable_t **>( ${obj['params'][0]['name']}[adapter_index]); if( ( 0 < ${obj['params'][2]['name']} ) && ( ${obj['params'][2]['name']} == total_platform_handle_count)) break; uint32_t library_platform_handle_count = 0; - result = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( &${obj['params'][0]['name']}[adapter_index], 1, 0, nullptr, &library_platform_handle_count ); + result = dditable->${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( &${obj['params'][0]['name']}[adapter_index], 1, 0, nullptr, &library_platform_handle_count ); if( ${X}_RESULT_SUCCESS != result ) break; if( nullptr != ${obj['params'][3]['name']} && ${obj['params'][2]['name']} !=0) @@ -122,21 +92,8 @@ namespace ur_loader if( total_platform_handle_count + library_platform_handle_count > ${obj['params'][2]['name']}) { library_platform_handle_count = ${obj['params'][2]['name']} - total_platform_handle_count; } - result = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( &${obj['params'][0]['name']}[adapter_index], 1, library_platform_handle_count, &${obj['params'][3]['name']}[ total_platform_handle_count ], nullptr ); + result = dditable->${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}( &${obj['params'][0]['name']}[adapter_index], 1, library_platform_handle_count, &${obj['params'][3]['name']}[ total_platform_handle_count ], nullptr ); if( ${X}_RESULT_SUCCESS != result ) break; - - try - { - for( uint32_t i = 0; i < library_platform_handle_count; ++i ) { - uint32_t platform_index = total_platform_handle_count + i; - ${obj['params'][3]['name']}[ platform_index ] = reinterpret_cast<${n}_platform_handle_t>( - context->factories.${n}_platform_factory.getInstance( ${obj['params'][3]['name']}[ platform_index ], dditable ) ); - } - } - catch( std::bad_alloc& ) - { - result = ${X}_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } } total_platform_handle_count += library_platform_handle_count; @@ -146,49 +103,25 @@ namespace ur_loader *${obj['params'][4]['name']} = total_platform_handle_count; %else: - <%param_replacements={}%> - %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): - %if not '_native_object_' in item['obj']: - // extract platform's function pointer table - auto dditable = reinterpret_cast<${item['obj']}*>( ${item['pointer']}${item['name']} )->dditable; - auto ${th.make_pfn_name(n, tags, obj)} = dditable->${n}.${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; - if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) - return ${X}_RESULT_ERROR_UNINITIALIZED; - - <%break%> - %endif - %endfor - %if func_basename == "EventSetCallback": - - // Replace the callback with a wrapper function that gives the callback the loader event rather than a - // backend-specific event - auto *wrapper_data = - new event_callback_wrapper_data_t{pfnNotify, hEvent, pUserData}; - pUserData = wrapper_data; - pfnNotify = event_callback_wrapper; - - %endif + <% + ddi_generated=False + %> %for i, item in enumerate(th.get_loader_prologue(n, tags, obj, meta)): + %if not ddi_generated and ('optional' not in item or not item['optional']) and not '_native_object_' in item['obj']: %if 'range' in item: - <% - add_local = True - param_replacements[item['name']] = item['name'] + 'Local.data()'%>// convert loader handles to platform handles - auto ${item['name']}Local = std::vector<${item['type']}>(${item['range'][1]}); - for( size_t i = ${item['range'][0]}; i < ${item['range'][1]}; ++i ) - ${item['name']}Local[ i ] = reinterpret_cast<${item['obj']}*>( ${item['name']}[ i ] )->handle; - %else: - %if not '_native_object_' in item['obj']: - // convert loader handle to platform handle - %if item['optional']: - ${item['name']} = ( ${item['name']} ) ? reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle : nullptr; + auto *dditable = *reinterpret_cast<${x}_dditable_t **>(${item['name']}[ 0 ]); %else: - ${item['name']} = reinterpret_cast<${item['obj']}*>( ${item['name']} )->handle; - %endif + auto *dditable = *reinterpret_cast<${x}_dditable_t **>(${item['name']}); %endif + <%break%> %endif %endfor + auto *${th.make_pfn_name(n, tags, obj)} = dditable->${th.get_table_name(n, tags, obj)}.${th.make_pfn_name(n, tags, obj)}; + if( nullptr == ${th.make_pfn_name(n, tags, obj)} ) + return ${X}_RESULT_ERROR_UNINITIALIZED; + <% epilogue = th.get_loader_epilogue(specs, n, tags, obj, meta) has_typename = False @@ -205,99 +138,11 @@ namespace ur_loader pPropSizeRet = &sizeret; %endif - ## Here we deal with handles buried inside struct type parameters. First - ## we create a local copy of the struct, then we convert all the handles - ## in that local copy and set the parameter to point to it before forwarding - ## it to the final API call. - <% handle_structs = th.get_object_handle_structs_to_convert(n, tags, obj, meta) %> - %if handle_structs: - // Deal with any struct parameters that have handle members we need to convert. - %for struct in handle_structs: - %if struct['optional']: - ${struct['type']} ${struct['name']}Local = {}; - if(${struct['name']}) - ${struct['name']}Local = *${struct['name']}; - %else: - auto ${struct['name']}Local = *${struct['name']}; - %endif - %endfor - - %for struct in handle_structs: - %for member in struct['members']: - ## If this member has a handle_members field that means it's a range of - ## structs which each contain a handle to convert. - %if 'handle_members' in member: - ## we use the parent info stripped of derefs for a unique variable name - <% - parent_no_deref = th.strip_deref(member['parent']) - range_vector_name = struct['name'] + parent_no_deref + member['name'] - ## we need to check if range bounds are literals or variables: variables - ## need the full reference chain prepended to them - range_start = member['range_start'] - if not re.match(r"[0-9]+$", range_start): - range_start = struct['name'] + "->" + member['parent'] + range_start - range_end = member['range_end'] - if not re.match(r"[0-9]+$", range_end): - range_end = struct['name'] + "->" + member['parent'] + range_end %> - std::vector<${member['type']}> ${range_vector_name}; - for(uint32_t i = ${range_start}; i < ${range_end}; i++) { - ${member['type']} NewRangeStruct = ${struct['name']}Local.${member['parent']}${member['name']}[i]; - %for handle_member in member['handle_members']: - %if handle_member['optional']: - if(NewRangeStruct.${handle_member['parent']}${handle_member['name']}) - %endif - NewRangeStruct.${handle_member['parent']}${handle_member['name']} = - reinterpret_cast<${handle_member['obj_name']}*>( - NewRangeStruct.${handle_member['parent']}${handle_member['name']}) - ->handle; - %endfor - - ${range_vector_name}.push_back(NewRangeStruct); - } - ${struct['name']}Local.${member['parent']}${member['name']} = ${range_vector_name}.data(); - ## If the member has range_start then its a range of handles - %elif 'range_start' in member: - ## we use the parent info stripped of derefs for a unique variable name - <% - parent_no_deref = th.strip_deref(member['parent']) - range_vector_name = struct['name'] + parent_no_deref + member['name'] %> - std::vector<${member['type']}> ${range_vector_name}; - for(uint32_t i = 0;i < ${struct['name']}->${member['parent']}${member['range_end']};i++) { - ${range_vector_name}.push_back(reinterpret_cast<${member['obj_name']}*>(${struct['name']}->${member['parent']}${member['name']}[i])->handle); - } - ${struct['name']}Local.${member['parent']}${member['name']} = ${range_vector_name}.data(); - %else: - %if member['optional']: - if(${struct['name']}Local.${member['parent']}${member['name']}) - %endif - ${struct['name']}Local.${member['parent']}${member['name']} = - reinterpret_cast<${member['obj_name']}*>( - ${struct['name']}Local.${member['parent']}${member['name']})->handle; - %endif - %endfor - %endfor - - // Now that we've converted all the members update the param pointers - %for struct in handle_structs: - %if struct['optional']: - if(${struct['name']}) - %endif - ${struct['name']} = &${struct['name']}Local; - %endfor - %endif - // forward to device-platform - %if add_local: - result = ${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name", "local"], replacements=param_replacements))} ); - %else: result = ${th.make_pfn_name(n, tags, obj)}( ${", ".join(th.make_param_lines(n, tags, obj, format=["name"]))} ); - %endif -<% - del param_replacements - del add_local - %> + %for i, item in enumerate(epilogue): - %if 0 == i and not item['release'] and not item['retain'] and not th.always_wrap_outputs(obj): + %if 0 == i and not item['release'] and not item['retain']: ## TODO: Remove once we have a concrete way for submitting warnings in place. %if re.match(r"Enqueue\w+", func_basename): // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any output handles below. @@ -309,60 +154,6 @@ namespace ur_loader %endif %endif - ## Possibly handle release/retain ref counting - there are no ur_exp-image factories - %if 'factory' in item and '_exp_image_' not in item['factory']: - %if item['release']: - // release loader handle - context->factories.${item['factory']}.release( ${item['name']} ); - %endif - %if item['retain']: - // increment refcount of handle - context->factories.${item['factory']}.retain( ${item['name']} ); - %endif - %endif - %if not item['release'] and not item['retain'] and not '_native_object_' in item['obj'] or th.make_func_name(n, tags, obj) == 'urPlatformCreateWithNativeHandle': - try - { - %if 'typename' in item: - if (${item['name']} != nullptr) { - switch (${item['typename']}) { - %for etor in item['etors']: - case ${etor['name']}: { - ${etor['type']} *handles = reinterpret_cast<${etor['type']} *>(${item['name']}); - size_t nelements = *pPropSizeRet / sizeof(${etor['type']}); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast<${etor['type']}>( - context->factories.${etor['factory']}.getInstance( handles[i], dditable ) ); - } - } - } break; - %endfor - default: {} break; - } - } - %elif 'range' in item: - // convert platform handles to loader handles - for( size_t i = ${item['range'][0]}; ( nullptr != ${item['name']} ) && ( i < ${item['range'][1]} ); ++i ) - ${item['name']}[ i ] = reinterpret_cast<${item['type']}>( - context->factories.${item['factory']}.getInstance( ${item['name']}[ i ], dditable ) ); - %else: - // convert platform handle to loader handle - %if item['optional'] or th.always_wrap_outputs(obj): - if( nullptr != ${item['name']} ) - *${item['name']} = reinterpret_cast<${item['type']}>( - context->factories.${item['factory']}.getInstance( *${item['name']}, dditable ) ); - %else: - *${item['name']} = reinterpret_cast<${item['type']}>( - context->factories.${item['factory']}.getInstance( *${item['name']}, dditable ) ); - %endif - %endif - } - catch( std::bad_alloc& ) - { - result = ${X}_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - %endif %endfor %endif @@ -417,7 +208,7 @@ ${tbl['export']['name']}( ur_loader::LibLoader::getFunctionPtr(platform.handle.get(), "${tbl['export']['name']}")); if(!getTable) continue; - platform.initStatus = getTable( version, &platform.dditable.${n}.${tbl['name']}); + platform.initStatus = getTable( version, &platform.dditable.${tbl['name']}); } if( ${X}_RESULT_SUCCESS == result ) @@ -440,7 +231,7 @@ ${tbl['export']['name']}( else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.${n}.${tbl['name']}; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.${tbl['name']}; } } diff --git a/scripts/templates/ldrddi.hpp.mako b/scripts/templates/ldrddi.hpp.mako deleted file mode 100644 index 6502504af2..0000000000 --- a/scripts/templates/ldrddi.hpp.mako +++ /dev/null @@ -1,57 +0,0 @@ -<%! -import re -from templates import helper as th -%><% - n=namespace - N=n.upper() - - x=tags['$x'] - X=x.upper() -%>/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. - * See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file ${name}.hpp - * - */ -#ifndef UR_LOADER_LDRDDI_H -#define UR_LOADER_LDRDDI_H 1 - -#include "${x}_object.hpp" -#include "${x}_singleton.hpp" - -namespace ur_loader -{ - /////////////////////////////////////////////////////////////////////////////// - <% - factories = [] - %> - %for obj in th.get_adapter_handles(specs): - %if 'class' in obj: - <% - _handle_t = th.subt(n, tags, obj['name']) - _object_t = re.sub(r"(\w+)_handle_t", r"\1_object_t", _handle_t) - _factory_t = re.sub(r"(\w+)_handle_t", r"\1_factory_t", _handle_t) - _factory = re.sub(r"(\w+)_handle_t", r"\1_factory", _handle_t) - factories.append((_factory_t, _factory)) - %>using ${th.append_ws(_object_t, 35)} = object_t < ${_handle_t} >; - using ${th.append_ws(_factory_t, 35)} = singleton_factory_t < ${_object_t}, ${_handle_t} >; - - %endif - %endfor - - struct handle_factories { - %for (f_t, f) in factories: - ${f_t} ${f}; - %endfor - }; - -} - -#endif /* UR_LOADER_LDRDDI_H */ diff --git a/scripts/templates/ur_interface_loader.cpp.mako b/scripts/templates/ur_interface_loader.cpp.mako index 125460be78..84a83d7d70 100644 --- a/scripts/templates/ur_interface_loader.cpp.mako +++ b/scripts/templates/ur_interface_loader.cpp.mako @@ -20,6 +20,7 @@ from templates import helper as th //===----------------------------------------------------------------------===// #include <${n}_api.h> #include <${n}_ddi.h> +#include #include "ur_interface_loader.hpp" @@ -68,22 +69,45 @@ ${X}_APIEXPORT ${x}_result_t ${X}_APICALL ${tbl['export']['name']}( } // extern "C" #endif -#ifdef UR_STATIC_ADAPTER_${Adapter} -namespace ur::${adapter} { -ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { - if (ddi == nullptr) { +namespace { +ur_result_t populateDdiTable(ur_dditable_t *ddi) { + if (ddi == nullptr) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } ur_result_t result; +#ifdef UR_STATIC_ADAPTER_${Adapter} +#define NAMESPACE_ ::ur::${adapter} +#else +#define NAMESPACE_ +#endif + %for tbl in th.get_pfntables(specs, meta, n, tags): - result = ${n}::${adapter}::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); + result = NAMESPACE_::${tbl['export']['name']}( ${X}_API_VERSION_CURRENT, &ddi->${tbl['name']} ); if (result != UR_RESULT_SUCCESS) return result; %endfor +#undef NAMESPACE_ + return result; } } + + +namespace ur::${adapter} { +${x}_dditable_t *ddi_getter::value() { + static std::once_flag flag; + static ${x}_dditable_t table; + + std::call_once(flag, []() { populateDdiTable(&table); }); + return &table; +} + +#ifdef UR_STATIC_ADAPTER_${Adapter} +ur_result_t urAdapterGetDdiTables(${x}_dditable_t *ddi) { + return populateDdiTable(ddi); +} #endif +} diff --git a/scripts/templates/ur_interface_loader.hpp.mako b/scripts/templates/ur_interface_loader.hpp.mako index 48bcec4794..043aa345e5 100644 --- a/scripts/templates/ur_interface_loader.hpp.mako +++ b/scripts/templates/ur_interface_loader.hpp.mako @@ -18,6 +18,8 @@ from templates import helper as th // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#pragma once + #include <${n}_api.h> #include <${n}_ddi.h> @@ -36,4 +38,8 @@ ${x}_result_t ${th.make_func_name(n, tags, obj)}( #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); #endif + +struct ddi_getter { + static ${x}_dditable_t *value(); +}; } diff --git a/source/adapters/cuda/adapter.cpp b/source/adapters/cuda/adapter.cpp index 3ea896bbd6..b9d755d7cd 100644 --- a/source/adapters/cuda/adapter.cpp +++ b/source/adapters/cuda/adapter.cpp @@ -14,7 +14,7 @@ #include "logger/ur_logger.hpp" #include "tracing.hpp" -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : ur_object_t_ { std::atomic RefCount = 0; std::mutex Mutex; struct cuda_tracing_context_t_ *TracingCtx = nullptr; @@ -41,7 +41,8 @@ class ur_legacy_sink : public logger::Sink { // through UR entry points. // https://github.com/oneapi-src/unified-runtime/issues/1330 ur_adapter_handle_t_::ur_adapter_handle_t_() - : logger(logger::get_logger("cuda", + : ur_object_t(), + logger(logger::get_logger("cuda", /*default_log_level*/ logger::Level::ERR)) { if (std::getenv("UR_LOG_CUDA") != nullptr) diff --git a/source/adapters/cuda/common.hpp b/source/adapters/cuda/common.hpp index 67223c45bc..af9b366e3d 100644 --- a/source/adapters/cuda/common.hpp +++ b/source/adapters/cuda/common.hpp @@ -59,3 +59,8 @@ void assertion(bool Condition, const char *Message = nullptr); } // namespace ur } // namespace detail + +struct cuda_ddi_getter { + static ur_dditable_t *value(); +}; +using ur_object_t_ = ur_handle_base_t_; diff --git a/source/adapters/cuda/context.hpp b/source/adapters/cuda/context.hpp index a10e8e9ca7..0d01542c7f 100644 --- a/source/adapters/cuda/context.hpp +++ b/source/adapters/cuda/context.hpp @@ -74,7 +74,7 @@ typedef void (*ur_context_extended_deleter_t)(void *user_data); /// if necessary. /// /// -struct ur_context_handle_t_ { +struct ur_context_handle_t_ : ur_object_t_ { struct deleter_data { ur_context_extended_deleter_t Function; @@ -87,7 +87,7 @@ struct ur_context_handle_t_ { std::atomic_uint32_t RefCount; ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) - : Devices{Devs, Devs + NumDevices}, RefCount{1} { + : ur_object_t_(), Devices{Devs, Devs + NumDevices}, RefCount{1} { for (auto &Dev : Devices) { urDeviceRetain(Dev); } diff --git a/source/adapters/cuda/device.hpp b/source/adapters/cuda/device.hpp index d9f6310103..c5dd9e18f2 100644 --- a/source/adapters/cuda/device.hpp +++ b/source/adapters/cuda/device.hpp @@ -13,7 +13,7 @@ #include "common.hpp" -struct ur_device_handle_t_ { +struct ur_device_handle_t_ : ur_object_t_ { private: using native_type = CUdevice; @@ -37,8 +37,8 @@ struct ur_device_handle_t_ { public: ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase, ur_platform_handle_t platform, uint32_t DevIndex) - : CuDevice(cuDevice), CuContext(cuContext), EvBase(evBase), RefCount{1}, - Platform(platform), DeviceIndex{DevIndex} { + : ur_object_t_(), CuDevice(cuDevice), CuContext(cuContext), + EvBase(evBase), RefCount{1}, Platform(platform), DeviceIndex{DevIndex} { UR_CHECK_ERROR(cuDeviceGetAttribute( &MaxRegsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, diff --git a/source/adapters/cuda/event.cpp b/source/adapters/cuda/event.cpp index a6c2208e8f..fcb1bfff30 100644 --- a/source/adapters/cuda/event.cpp +++ b/source/adapters/cuda/event.cpp @@ -24,7 +24,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, native_type EvEnd, native_type EvQueued, native_type EvStart, CUstream Stream, uint32_t StreamToken) - : CommandType{Type}, RefCount{1}, HasOwnership{true}, + : ur_object_t(), CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, StreamToken{StreamToken}, EventID{0}, EvEnd{EvEnd}, EvStart{EvStart}, EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} { @@ -34,11 +34,12 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, CUevent EventNative) - : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, - HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - IsInterop{true}, StreamToken{std::numeric_limits::max()}, - EventID{0}, EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, - Queue{nullptr}, Stream{nullptr}, Context{Context} { + : ur_object_t(), CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, + HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, + IsStarted{false}, IsInterop{true}, + StreamToken{std::numeric_limits::max()}, EventID{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } diff --git a/source/adapters/cuda/event.hpp b/source/adapters/cuda/event.hpp index 10b52799a9..4d7d69cd6e 100644 --- a/source/adapters/cuda/event.hpp +++ b/source/adapters/cuda/event.hpp @@ -17,7 +17,7 @@ /// UR Event mapping to CUevent /// -struct ur_event_handle_t_ { +struct ur_event_handle_t_ : ur_object_t_ { public: using native_type = CUevent; diff --git a/source/adapters/cuda/kernel.hpp b/source/adapters/cuda/kernel.hpp index f299714b02..42a5cca37a 100644 --- a/source/adapters/cuda/kernel.hpp +++ b/source/adapters/cuda/kernel.hpp @@ -34,7 +34,7 @@ /// A compiler pass converts the UR API local memory model into the /// CUDA shared model. This object simply calculates the total of /// shared memory, and the initial offsets of each parameter. -struct ur_kernel_handle_t_ { +struct ur_kernel_handle_t_ : ur_object_t_ { using native_type = CUfunction; native_type Function; @@ -250,8 +250,9 @@ struct ur_kernel_handle_t_ { ur_kernel_handle_t_(CUfunction Func, CUfunction FuncWithOffsetParam, const char *Name, ur_program_handle_t Program, ur_context_handle_t Context) - : Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam}, - Name{Name}, Context{Context}, Program{Program}, RefCount{1} { + : ur_object_t_(), Function{Func}, + FunctionWithOffsetParam{FuncWithOffsetParam}, Name{Name}, + Context{Context}, Program{Program}, RefCount{1} { urProgramRetain(Program); urContextRetain(Context); /// Note: this code assumes that there is only one device per context diff --git a/source/adapters/cuda/memory.hpp b/source/adapters/cuda/memory.hpp index 6dcaa28414..4f62299f78 100644 --- a/source/adapters/cuda/memory.hpp +++ b/source/adapters/cuda/memory.hpp @@ -310,7 +310,7 @@ struct SurfaceMem { /// is on a different device, marked by /// LastQueueWritingToMemObj->getDevice() /// -struct ur_mem_handle_t_ { +struct ur_mem_handle_t_ : ur_object_t_ { // Context where the memory object is accessible ur_context_handle_t Context; @@ -345,7 +345,7 @@ struct ur_mem_handle_t_ { /// Constructs the UR mem handler for a non-typed allocation ("buffer") ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags, BufferMem::AllocMode Mode, void *HostPtr, size_t Size) - : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + : ur_object_t_(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), Mem{std::in_place_type, Ctxt, this, Mode, HostPtr, Size} { urContextRetain(Context); @@ -353,9 +353,9 @@ struct ur_mem_handle_t_ { // Subbuffer constructor ur_mem_handle_t_(ur_mem_handle_t Parent, size_t SubBufferOffset) - : Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags}, - HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(), - false), + : ur_object_t(), Context{Parent->Context}, RefCount{1}, + MemFlags{Parent->MemFlags}, HaveMigratedToDeviceSinceLastWrite( + Parent->Context->Devices.size(), false), Mem{BufferMem{std::get(Parent->Mem)}} { auto &SubBuffer = std::get(Mem); SubBuffer.Parent = Parent; @@ -376,7 +376,7 @@ struct ur_mem_handle_t_ { ur_mem_handle_t_(ur_context_handle_t Ctxt, ur_mem_flags_t MemFlags, ur_image_format_t ImageFormat, ur_image_desc_t ImageDesc, void *HostPtr) - : Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, + : ur_object_t_(), Context{Ctxt}, RefCount{1}, MemFlags{MemFlags}, HaveMigratedToDeviceSinceLastWrite(Context->Devices.size(), false), Mem{std::in_place_type, Ctxt, diff --git a/source/adapters/cuda/physical_mem.hpp b/source/adapters/cuda/physical_mem.hpp index 7e38e1be9e..3b8cfc42d7 100644 --- a/source/adapters/cuda/physical_mem.hpp +++ b/source/adapters/cuda/physical_mem.hpp @@ -20,7 +20,7 @@ /// UR queue mapping on physical memory allocations used in virtual memory /// management. /// -struct ur_physical_mem_handle_t_ { +struct ur_physical_mem_handle_t_ : ur_object_t_ { using native_type = CUmemGenericAllocationHandle; std::atomic_uint32_t RefCount; @@ -33,8 +33,8 @@ struct ur_physical_mem_handle_t_ { ur_physical_mem_handle_t_(native_type PhysMem, ur_context_handle_t_ *Ctx, ur_device_handle_t Device, size_t Size, ur_physical_mem_properties_t Properties) - : RefCount(1), PhysicalMem(PhysMem), Context(Ctx), Device(Device), - Size(Size), Properties(Properties) { + : ur_object_t_(), RefCount(1), PhysicalMem(PhysMem), Context(Ctx), + Device(Device), Size(Size), Properties(Properties) { urContextRetain(Context); urDeviceRetain(Device); } diff --git a/source/adapters/cuda/platform.hpp b/source/adapters/cuda/platform.hpp index 5da72057ab..3d98c5d39b 100644 --- a/source/adapters/cuda/platform.hpp +++ b/source/adapters/cuda/platform.hpp @@ -12,6 +12,6 @@ #include #include -struct ur_platform_handle_t_ { +struct ur_platform_handle_t_ : ur_object_t_ { std::vector> Devices; }; diff --git a/source/adapters/cuda/program.hpp b/source/adapters/cuda/program.hpp index 10998cae2c..5128b1f8b6 100644 --- a/source/adapters/cuda/program.hpp +++ b/source/adapters/cuda/program.hpp @@ -17,7 +17,7 @@ #include "context.hpp" -struct ur_program_handle_t_ { +struct ur_program_handle_t_ : ur_object_t_ { using native_type = CUmodule; native_type Module; const char *Binary; @@ -47,9 +47,10 @@ struct ur_program_handle_t_ { ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE; ur_program_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device) - : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1}, - Context{Context}, Device{Device}, KernelReqdWorkGroupSizeMD{}, - KernelMaxWorkGroupSizeMD{}, KernelMaxLinearWorkGroupSizeMD{} { + : ur_object_t_(), Module{nullptr}, Binary{}, BinarySizeInBytes{0}, + RefCount{1}, Context{Context}, Device{Device}, + KernelReqdWorkGroupSizeMD{}, KernelMaxWorkGroupSizeMD{}, + KernelMaxLinearWorkGroupSizeMD{} { urContextRetain(Context); urDeviceRetain(Device); } diff --git a/source/adapters/cuda/queue.hpp b/source/adapters/cuda/queue.hpp index 89132c99ca..de0cd53275 100644 --- a/source/adapters/cuda/queue.hpp +++ b/source/adapters/cuda/queue.hpp @@ -21,7 +21,7 @@ using ur_stream_guard_ = std::unique_lock; /// UR queue mapping on to CUstream objects. /// -struct ur_queue_handle_t_ { +struct ur_queue_handle_t_ : ur_object_t_ { using native_type = CUstream; static constexpr int DefaultNumComputeStreams = 128; @@ -70,7 +70,7 @@ struct ur_queue_handle_t_ { ur_context_handle_t_ *Context, ur_device_handle_t_ *Device, unsigned int Flags, ur_queue_flags_t URFlags, int Priority, bool BackendOwns = true) - : ComputeStreams{std::move(ComputeStreams)}, + : ur_object_t_(), ComputeStreams{std::move(ComputeStreams)}, TransferStreams{std::move(TransferStreams)}, DelayCompute(this->ComputeStreams.size(), false), ComputeAppliedBarrier(this->ComputeStreams.size()), diff --git a/source/adapters/cuda/sampler.hpp b/source/adapters/cuda/sampler.hpp index 4823541c73..cabbacb5e6 100644 --- a/source/adapters/cuda/sampler.hpp +++ b/source/adapters/cuda/sampler.hpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include /// Implementation of samplers for CUDA @@ -23,7 +24,7 @@ /// | 4 3 2 | addressing mode 1 /// | 1 | filter mode /// | 0 | normalize coords -struct ur_sampler_handle_t_ { +struct ur_sampler_handle_t_ : ur_object_t_ { std::atomic_uint32_t RefCount; uint32_t Props; float MinMipmapLevelClamp; @@ -32,7 +33,7 @@ struct ur_sampler_handle_t_ { ur_context_handle_t Context; ur_sampler_handle_t_(ur_context_handle_t Context) - : RefCount(1), Props(0), MinMipmapLevelClamp(0.0f), + : ur_object_t_(), RefCount(1), Props(0), MinMipmapLevelClamp(0.0f), MaxMipmapLevelClamp(0.0f), MaxAnisotropy(0.0f), Context(Context) {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index d3fea1aee2..2a75864436 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include @@ -465,3 +466,36 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( #if defined(__cplusplus) } // extern "C" #endif + +ur_dditable_t *cuda_ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, []() { + urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, &table.Global); + urGetBindlessImagesExpProcAddrTable(UR_API_VERSION_CURRENT, + &table.BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(UR_API_VERSION_CURRENT, + &table.CommandBufferExp); + urGetContextProcAddrTable(UR_API_VERSION_CURRENT, &table.Context); + urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, &table.Enqueue); + urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, &table.EnqueueExp); + urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &table.Event); + urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, &table.Kernel); + urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, &table.KernelExp); + urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &table.Mem); + urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, &table.PhysicalMem); + urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, &table.Platform); + urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, &table.Program); + urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, &table.ProgramExp); + urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, &table.Queue); + urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, &table.Sampler); + urGetTensorMapExpProcAddrTable(UR_API_VERSION_CURRENT, &table.TensorMapExp); + urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &table.USM); + urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, &table.USMExp); + urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, &table.UsmP2PExp); + urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, &table.VirtualMem); + urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, &table.Device); + }); + return &table; +} diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 7f48309ca3..3856f1ac2b 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -378,7 +378,7 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) - : Context{Context} { + : ur_object_t_(), Context{Context} { const void *pNext = PoolDesc->pNext; while (pNext != nullptr) { const ur_base_desc_t *BaseDesc = static_cast(pNext); diff --git a/source/adapters/cuda/usm.hpp b/source/adapters/cuda/usm.hpp index 7c6a2ea666..a947bb4ccc 100644 --- a/source/adapters/cuda/usm.hpp +++ b/source/adapters/cuda/usm.hpp @@ -15,7 +15,7 @@ usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); -struct ur_usm_pool_handle_t_ { +struct ur_usm_pool_handle_t_ : ur_object_t_ { std::atomic_uint32_t RefCount = 1; ur_context_handle_t Context = nullptr; diff --git a/source/adapters/hip/adapter.cpp b/source/adapters/hip/adapter.cpp index 9daaee8a29..cee175aff4 100644 --- a/source/adapters/hip/adapter.cpp +++ b/source/adapters/hip/adapter.cpp @@ -15,7 +15,7 @@ #include #include -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : ur_object_t_ { std::atomic RefCount = 0; logger::Logger &logger; ur_adapter_handle_t_(); @@ -40,8 +40,8 @@ class ur_legacy_sink : public logger::Sink { // through UR entry points. // https://github.com/oneapi-src/unified-runtime/issues/1330 ur_adapter_handle_t_::ur_adapter_handle_t_() - : logger( - logger::get_logger("hip", /*default_log_level*/ logger::Level::ERR)) { + : ur_object_t_(), logger(logger::get_logger( + "hip", /*default_log_level*/ logger::Level::ERR)) { if (std::getenv("UR_LOG_HIP") != nullptr) return; diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 887eb75287..feb73ed97a 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -48,9 +48,9 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable) - : Context(hContext), Device(hDevice), IsUpdatable(IsUpdatable), - HIPGraph{nullptr}, HIPGraphExec{nullptr}, RefCountInternal{1}, - RefCountExternal{1}, NextSyncPoint{0} { + : ur_object_t_(), Context(hContext), Device(hDevice), + IsUpdatable(IsUpdatable), HIPGraph{nullptr}, HIPGraphExec{nullptr}, + RefCountInternal{1}, RefCountExternal{1}, NextSyncPoint{0} { urContextRetain(hContext); urDeviceRetain(hDevice); } @@ -80,8 +80,9 @@ ur_exp_command_buffer_command_handle_t_:: const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, ur_kernel_handle_t *KernelAlternatives) - : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), - WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { + : ur_object_t_(), CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), + Params(Params), WorkDim(WorkDim), RefCountInternal(1), + RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); const size_t CopySize = sizeof(size_t) * WorkDim; diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index a236a32c24..3c02d9ee27 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -39,7 +39,7 @@ // Struct that stores all the information related to a kernel command in a // command-buffer, such that the command can be recreated. When handles can // be returned from other command types this struct will need refactored. -struct ur_exp_command_buffer_command_handle_t_ { +struct ur_exp_command_buffer_command_handle_t_ : ur_object_t_ { ur_exp_command_buffer_command_handle_t_( ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, hipGraphNode_t Node, hipKernelNodeParams Params, uint32_t WorkDim, @@ -121,7 +121,7 @@ struct ur_exp_command_buffer_command_handle_t_ { std::atomic_uint32_t RefCountExternal; }; -struct ur_exp_command_buffer_handle_t_ { +struct ur_exp_command_buffer_handle_t_ : ur_object_t_ { ur_exp_command_buffer_handle_t_(ur_context_handle_t hContext, ur_device_handle_t hDevice, bool IsUpdatable); diff --git a/source/adapters/hip/common.hpp b/source/adapters/hip/common.hpp index 98799d58f5..cbbb91bf79 100644 --- a/source/adapters/hip/common.hpp +++ b/source/adapters/hip/common.hpp @@ -239,3 +239,8 @@ inline static unsigned getMemoryType(hipPointerAttribute_t hipPointerAttrs) { return hipPointerAttrs.memoryType; #endif } + +struct hip_ddi_getter { + static ur_dditable_t *value(); +}; +using ur_object_t_ = ur_handle_base_t_; diff --git a/source/adapters/hip/context.hpp b/source/adapters/hip/context.hpp index 5af95753b8..e6486db8c5 100644 --- a/source/adapters/hip/context.hpp +++ b/source/adapters/hip/context.hpp @@ -76,7 +76,7 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData); /// between native allocations for devices in the same \c ur_context_handle_t_ /// if necessary. /// -struct ur_context_handle_t_ { +struct ur_context_handle_t_ : ur_object_t_ { struct deleter_data { ur_context_extended_deleter_t Function; @@ -90,7 +90,7 @@ struct ur_context_handle_t_ { std::atomic_uint32_t RefCount; ur_context_handle_t_(const ur_device_handle_t *Devs, uint32_t NumDevices) - : Devices{Devs, Devs + NumDevices}, RefCount{1} { + : ur_object_t_(), Devices{Devs, Devs + NumDevices}, RefCount{1} { for (auto &Dev : Devices) { urDeviceRetain(Dev); } diff --git a/source/adapters/hip/device.hpp b/source/adapters/hip/device.hpp index bd2b6002e0..d9bf5c7338 100644 --- a/source/adapters/hip/device.hpp +++ b/source/adapters/hip/device.hpp @@ -17,7 +17,7 @@ /// Includes an observer pointer to the platform, /// and implements the reference counting semantics since /// HIP objects are not refcounted. -struct ur_device_handle_t_ { +struct ur_device_handle_t_ : ur_object_t_ { private: using native_type = hipDevice_t; @@ -38,8 +38,8 @@ struct ur_device_handle_t_ { public: ur_device_handle_t_(native_type HipDevice, hipEvent_t EvBase, ur_platform_handle_t Platform, uint32_t DeviceIndex) - : HIPDevice(HipDevice), RefCount{1}, Platform(Platform), EvBase(EvBase), - DeviceIndex(DeviceIndex) { + : ur_object_t_(), HIPDevice(HipDevice), RefCount{1}, Platform(Platform), + EvBase(EvBase), DeviceIndex(DeviceIndex) { UR_CHECK_ERROR(hipDeviceGetAttribute( &MaxWorkGroupSize, hipDeviceAttributeMaxThreadsPerBlock, HIPDevice)); diff --git a/source/adapters/hip/event.cpp b/source/adapters/hip/event.cpp index 81c839cf32..3eaa2afd3c 100644 --- a/source/adapters/hip/event.cpp +++ b/source/adapters/hip/event.cpp @@ -19,7 +19,7 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, hipEvent_t EvEnd, hipEvent_t EvQueued, hipEvent_t EvStart, hipStream_t Stream, uint32_t StreamToken) - : CommandType{Type}, RefCount{1}, HasOwnership{true}, + : ur_object_t_(), CommandType{Type}, RefCount{1}, HasOwnership{true}, HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, StreamToken{StreamToken}, EventId{0}, EvEnd{EvEnd}, EvStart{EvStart}, EvQueued{EvQueued}, Queue{Queue}, Stream{Stream}, Context{Context} { @@ -29,11 +29,12 @@ ur_event_handle_t_::ur_event_handle_t_(ur_command_t Type, ur_event_handle_t_::ur_event_handle_t_(ur_context_handle_t Context, hipEvent_t EventNative) - : CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, HasOwnership{false}, - HasBeenWaitedOn{false}, IsRecorded{false}, IsStarted{false}, - IsInterop{true}, StreamToken{std::numeric_limits::max()}, - EventId{0}, EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, - Queue{nullptr}, Stream{nullptr}, Context{Context} { + : ur_object_t_(), CommandType{UR_COMMAND_EVENTS_WAIT}, RefCount{1}, + HasOwnership{false}, HasBeenWaitedOn{false}, IsRecorded{false}, + IsStarted{false}, IsInterop{true}, + StreamToken{std::numeric_limits::max()}, EventId{0}, + EvEnd{EventNative}, EvStart{nullptr}, EvQueued{nullptr}, Queue{nullptr}, + Stream{nullptr}, Context{Context} { urContextRetain(Context); } diff --git a/source/adapters/hip/event.hpp b/source/adapters/hip/event.hpp index ab9544a552..aae89a2e33 100644 --- a/source/adapters/hip/event.hpp +++ b/source/adapters/hip/event.hpp @@ -14,7 +14,7 @@ /// UR Event mapping to hipEvent_t /// -struct ur_event_handle_t_ { +struct ur_event_handle_t_ : ur_object_t_ { public: using native_type = hipEvent_t; diff --git a/source/adapters/hip/kernel.hpp b/source/adapters/hip/kernel.hpp index 5ec51e7fa4..70e60f4973 100644 --- a/source/adapters/hip/kernel.hpp +++ b/source/adapters/hip/kernel.hpp @@ -32,7 +32,7 @@ /// A compiler pass converts the UR API local memory model into the /// HIP shared model. This object simply calculates the total of /// shared memory, and the initial offsets of each parameter. -struct ur_kernel_handle_t_ { +struct ur_kernel_handle_t_ : ur_object_t_ { using native_type = hipFunction_t; native_type Function; @@ -232,8 +232,9 @@ struct ur_kernel_handle_t_ { ur_kernel_handle_t_(hipFunction_t Func, hipFunction_t FuncWithOffsetParam, const char *Name, ur_program_handle_t Program, ur_context_handle_t Ctxt) - : Function{Func}, FunctionWithOffsetParam{FuncWithOffsetParam}, - Name{Name}, Context{Ctxt}, Program{Program}, RefCount{1} { + : ur_object_t_(), Function{Func}, + FunctionWithOffsetParam{FuncWithOffsetParam}, Name{Name}, Context{Ctxt}, + Program{Program}, RefCount{1} { assert(Program->getDevice()); UR_CHECK_ERROR(urKernelGetGroupInfo( this, Program->getDevice(), diff --git a/source/adapters/hip/memory.hpp b/source/adapters/hip/memory.hpp index 5b20706b45..a8dd4f0487 100644 --- a/source/adapters/hip/memory.hpp +++ b/source/adapters/hip/memory.hpp @@ -307,7 +307,7 @@ struct SurfaceMem { /// Migrations will occur in both cases if the most recent version of data /// is on a different device, marked by LastQueueWritingToMemObj->getDevice(). /// -struct ur_mem_handle_t_ { +struct ur_mem_handle_t_ : ur_object_t_ { // TODO: Move as much shared data up as possible using ur_context = ur_context_handle_t_ *; @@ -355,9 +355,9 @@ struct ur_mem_handle_t_ { // Subbuffer constructor ur_mem_handle_t_(ur_mem Parent, size_t SubBufferOffset) - : Context{Parent->Context}, RefCount{1}, MemFlags{Parent->MemFlags}, - HaveMigratedToDeviceSinceLastWrite(Parent->Context->Devices.size(), - false), + : ur_object_t_(), Context{Parent->Context}, RefCount{1}, + MemFlags{Parent->MemFlags}, HaveMigratedToDeviceSinceLastWrite( + Parent->Context->Devices.size(), false), Mem{BufferMem{std::get(Parent->Mem)}} { auto &SubBuffer = std::get(Mem); SubBuffer.Parent = Parent; diff --git a/source/adapters/hip/physical_mem.hpp b/source/adapters/hip/physical_mem.hpp index fc50836f62..12d4068516 100644 --- a/source/adapters/hip/physical_mem.hpp +++ b/source/adapters/hip/physical_mem.hpp @@ -17,10 +17,10 @@ /// management. /// TODO: Implement. /// -struct ur_physical_mem_handle_t_ { +struct ur_physical_mem_handle_t_ : ur_object_t_ { std::atomic_uint32_t RefCount; - ur_physical_mem_handle_t_() : RefCount(1) {} + ur_physical_mem_handle_t_() : ur_object_t_(), RefCount(1) {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/source/adapters/hip/platform.hpp b/source/adapters/hip/platform.hpp index 7b96de6473..565ea5532c 100644 --- a/source/adapters/hip/platform.hpp +++ b/source/adapters/hip/platform.hpp @@ -19,6 +19,6 @@ /// available devices since initialization is done /// when devices are used. /// -struct ur_platform_handle_t_ { +struct ur_platform_handle_t_ : ur_object_t_ { std::vector> Devices; }; diff --git a/source/adapters/hip/program.hpp b/source/adapters/hip/program.hpp index 25a70e6a7a..6b021ab0bd 100644 --- a/source/adapters/hip/program.hpp +++ b/source/adapters/hip/program.hpp @@ -17,7 +17,7 @@ #include "context.hpp" /// Implementation of UR Program on HIP Module object -struct ur_program_handle_t_ { +struct ur_program_handle_t_ : ur_object_t_ { using native_type = hipModule_t; native_type Module; const char *Binary; @@ -47,8 +47,9 @@ struct ur_program_handle_t_ { ur_program_build_status_t BuildStatus = UR_PROGRAM_BUILD_STATUS_NONE; ur_program_handle_t_(ur_context_handle_t Ctxt, ur_device_handle_t Device) - : Module{nullptr}, Binary{}, BinarySizeInBytes{0}, RefCount{1}, - Context{Ctxt}, Device{Device}, KernelReqdWorkGroupSizeMD{} { + : ur_object_t_(), Module{nullptr}, Binary{}, BinarySizeInBytes{0}, + RefCount{1}, Context{Ctxt}, Device{Device}, + KernelReqdWorkGroupSizeMD{} { urContextRetain(Context); urDeviceRetain(Device); } diff --git a/source/adapters/hip/queue.hpp b/source/adapters/hip/queue.hpp index ccd94c1419..15adc0a6c6 100644 --- a/source/adapters/hip/queue.hpp +++ b/source/adapters/hip/queue.hpp @@ -18,7 +18,7 @@ using ur_stream_guard = std::unique_lock; /// UR queue mapping on to hipStream_t objects. /// -struct ur_queue_handle_t_ { +struct ur_queue_handle_t_ : ur_object_t_ { using native_type = hipStream_t; static constexpr int DefaultNumComputeStreams = 64; static constexpr int DefaultNumTransferStreams = 16; @@ -66,7 +66,7 @@ struct ur_queue_handle_t_ { ur_context_handle_t Context, ur_device_handle_t Device, unsigned int Flags, ur_queue_flags_t URFlags, int Priority, bool BackendOwns = true) - : ComputeStreams{std::move(ComputeStreams)}, + : ur_object_t_(), ComputeStreams{std::move(ComputeStreams)}, TransferStreams{std::move(TransferStreams)}, DelayCompute(this->ComputeStreams.size(), false), ComputeAppliedBarrier(this->ComputeStreams.size()), diff --git a/source/adapters/hip/sampler.hpp b/source/adapters/hip/sampler.hpp index df2da74db7..d8dd5a7c28 100644 --- a/source/adapters/hip/sampler.hpp +++ b/source/adapters/hip/sampler.hpp @@ -17,13 +17,13 @@ /// Sampler property layout: /// | 31 30 ... 6 5 | 4 3 2 | 1 | 0 | /// | N/A | addressing mode | fiter mode | normalize coords | -struct ur_sampler_handle_t_ { +struct ur_sampler_handle_t_ : ur_object_t_ { std::atomic_uint32_t RefCount; uint32_t Props; ur_context_handle_t Context; ur_sampler_handle_t_(ur_context_handle_t Context) - : RefCount(1), Props(0), Context(Context) {} + : ur_object_t_(), RefCount(1), Props(0), Context(Context) {} uint32_t incrementReferenceCount() noexcept { return ++RefCount; } diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 5e5e6ade84..549b3216d4 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include @@ -431,3 +432,36 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( #if defined(__cplusplus) } // extern "C" #endif + +ur_dditable_t *hip_ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, []() { + urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, &table.Global); + urGetBindlessImagesExpProcAddrTable(UR_API_VERSION_CURRENT, + &table.BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(UR_API_VERSION_CURRENT, + &table.CommandBufferExp); + urGetContextProcAddrTable(UR_API_VERSION_CURRENT, &table.Context); + urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, &table.Enqueue); + urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, &table.EnqueueExp); + urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &table.Event); + urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, &table.Kernel); + urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, &table.KernelExp); + urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &table.Mem); + urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, &table.PhysicalMem); + urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, &table.Platform); + urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, &table.Program); + urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, &table.ProgramExp); + urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, &table.Queue); + urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, &table.Sampler); + urGetTensorMapExpProcAddrTable(UR_API_VERSION_CURRENT, &table.TensorMapExp); + urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &table.USM); + urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, &table.USMExp); + urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, &table.UsmP2PExp); + urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, &table.VirtualMem); + urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, &table.Device); + }); + return &table; +} diff --git a/source/adapters/hip/usm.cpp b/source/adapters/hip/usm.cpp index 922098e4a1..0eba5d9824 100644 --- a/source/adapters/hip/usm.cpp +++ b/source/adapters/hip/usm.cpp @@ -329,7 +329,7 @@ ur_result_t USMHostMemoryProvider::allocateImpl(void **ResultPtr, size_t Size, ur_usm_pool_handle_t_::ur_usm_pool_handle_t_(ur_context_handle_t Context, ur_usm_pool_desc_t *PoolDesc) - : Context(Context) { + : ur_object_t_(), Context(Context) { if (PoolDesc) { if (auto *Limits = find_stype_node(PoolDesc)) { for (auto &config : DisjointPoolConfigs.Configs) { diff --git a/source/adapters/hip/usm.hpp b/source/adapters/hip/usm.hpp index 2149ac26ba..969b625d14 100644 --- a/source/adapters/hip/usm.hpp +++ b/source/adapters/hip/usm.hpp @@ -15,7 +15,7 @@ usm::DisjointPoolAllConfigs InitializeDisjointPoolConfig(); -struct ur_usm_pool_handle_t_ { +struct ur_usm_pool_handle_t_ : ur_object_t_ { std::atomic_uint32_t RefCount = 1; ur_context_handle_t Context = nullptr; diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 88b9458242..fad1fc914b 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -295,7 +295,7 @@ Behavior Summary: SysMan initialization is skipped. */ ur_adapter_handle_t_::ur_adapter_handle_t_() - : logger(logger::get_logger("level_zero")) { + : ur_handle_base_t_(), logger(logger::get_logger("level_zero")) { ZeInitDriversResult = ZE_RESULT_ERROR_UNINITIALIZED; ZeInitResult = ZE_RESULT_ERROR_UNINITIALIZED; ZesResult = ZE_RESULT_ERROR_UNINITIALIZED; diff --git a/source/adapters/level_zero/adapter.hpp b/source/adapters/level_zero/adapter.hpp index c41f671d9b..4901e24928 100644 --- a/source/adapters/level_zero/adapter.hpp +++ b/source/adapters/level_zero/adapter.hpp @@ -10,6 +10,7 @@ #pragma once #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" #include #include #include @@ -24,7 +25,7 @@ using PlatformVec = std::vector>; class ur_legacy_sink; -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : ur_handle_base_t_ { ur_adapter_handle_t_(); std::atomic RefCount = 0; std::mutex Mutex; diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index 590a9badea..d703bc0cb3 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -26,6 +26,7 @@ #include #include "logger/ur_logger.hpp" +#include "ur_interface_loader.hpp" struct _ur_platform_handle_t; @@ -410,7 +411,7 @@ struct ReferenceCounter { }; // Base class to store common data -struct _ur_object { +struct _ur_object : ur_handle_base_t_ { _ur_object() : RefCount{} {} // Must be atomic to prevent data race when incrementing/decrementing. diff --git a/source/adapters/level_zero/memory.hpp b/source/adapters/level_zero/memory.hpp index 88633b8bb6..937a75cd43 100644 --- a/source/adapters/level_zero/memory.hpp +++ b/source/adapters/level_zero/memory.hpp @@ -96,11 +96,11 @@ struct ur_mem_handle_t_ : _ur_object { protected: ur_mem_handle_t_(mem_type_t type, ur_context_handle_t Context) - : UrContext{Context}, UrDevice{nullptr}, mem_type(type) {} + : _ur_object(), UrContext{Context}, UrDevice{nullptr}, mem_type(type) {} ur_mem_handle_t_(mem_type_t type, ur_context_handle_t Context, ur_device_handle_t Device) - : UrContext{Context}, UrDevice(Device), mem_type(type) {} + : _ur_object(), UrContext{Context}, UrDevice(Device), mem_type(type) {} }; struct _ur_buffer final : ur_mem_handle_t_ { diff --git a/source/adapters/level_zero/platform.hpp b/source/adapters/level_zero/platform.hpp index 1381f51bca..7afa157ca1 100644 --- a/source/adapters/level_zero/platform.hpp +++ b/source/adapters/level_zero/platform.hpp @@ -25,9 +25,11 @@ struct ur_zes_device_handle_data_t { ze_bool_t SubDevice = false; }; -struct ur_platform_handle_t_ : public _ur_platform { +struct ur_platform_handle_t_ : ur_handle_base_t_, + public _ur_platform { ur_platform_handle_t_(ze_driver_handle_t Driver) - : ZeDriver{Driver}, ZeApiVersion{ZE_API_VERSION_CURRENT} {} + : ur_handle_base_t_(), ZeDriver{Driver}, + ZeApiVersion{ZE_API_VERSION_CURRENT} {} // Performs initialization of a newly constructed PI platform. ur_result_t initialize(); diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 739a3f3ee4..4007bd4307 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include #include #include @@ -536,105 +537,125 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( } // extern "C" #endif -#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO -namespace ur::level_zero { -ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { +namespace { +ur_result_t populateDdiTable(ur_dditable_t *ddi) { if (ddi == nullptr) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } ur_result_t result; - result = ur::level_zero::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Global); +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +#define NAMESPACE_ ::ur::level_zero +#else +#define NAMESPACE_ +#endif + + result = NAMESPACE_::urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Global); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetBindlessImagesExpProcAddrTable( + result = NAMESPACE_::urGetBindlessImagesExpProcAddrTable( UR_API_VERSION_CURRENT, &ddi->BindlessImagesExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetCommandBufferExpProcAddrTable( + result = NAMESPACE_::urGetCommandBufferExpProcAddrTable( UR_API_VERSION_CURRENT, &ddi->CommandBufferExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Context); + result = NAMESPACE_::urGetContextProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Context); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Enqueue); + result = NAMESPACE_::urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Enqueue); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->EnqueueExp); + result = NAMESPACE_::urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->EnqueueExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Event); + result = + NAMESPACE_::urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Event); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Kernel); + result = NAMESPACE_::urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Kernel); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->KernelExp); + result = NAMESPACE_::urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->KernelExp); if (result != UR_RESULT_SUCCESS) return result; - result = - ur::level_zero::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); + result = NAMESPACE_::urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Mem); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->PhysicalMem); + result = NAMESPACE_::urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->PhysicalMem); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Platform); + result = NAMESPACE_::urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Platform); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Program); + result = NAMESPACE_::urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Program); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->ProgramExp); + result = NAMESPACE_::urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->ProgramExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Queue); + result = + NAMESPACE_::urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, &ddi->Queue); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Sampler); + result = NAMESPACE_::urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Sampler); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetTensorMapExpProcAddrTable( - UR_API_VERSION_CURRENT, &ddi->TensorMapExp); + result = NAMESPACE_::urGetTensorMapExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->TensorMapExp); if (result != UR_RESULT_SUCCESS) return result; - result = - ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); + result = NAMESPACE_::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->USMExp); + result = NAMESPACE_::urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->USMExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->UsmP2PExp); + result = NAMESPACE_::urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->UsmP2PExp); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->VirtualMem); + result = NAMESPACE_::urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->VirtualMem); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, - &ddi->Device); + result = NAMESPACE_::urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, + &ddi->Device); if (result != UR_RESULT_SUCCESS) return result; +#undef NAMESPACE_ + return result; } -} // namespace ur::level_zero +} // namespace + +namespace ur::level_zero { +ur_dditable_t *ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, []() { populateDdiTable(&table); }); + return &table; +} + +#ifdef UR_STATIC_ADAPTER_LEVEL_ZERO +ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { + return populateDdiTable(ddi); +} #endif +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 0814427837..13642acd0c 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -8,6 +8,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#pragma once + #include #include @@ -768,4 +770,8 @@ urTensorMapEncodeTiledExp(ur_device_handle_t hDevice, #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); #endif + +struct ddi_getter { + static ur_dditable_t *value(); +}; } // namespace ur::level_zero diff --git a/source/adapters/mock/ur_mock.cpp b/source/adapters/mock/ur_mock.cpp index 8af1cbd320..e64ee24417 100644 --- a/source/adapters/mock/ur_mock.cpp +++ b/source/adapters/mock/ur_mock.cpp @@ -81,6 +81,15 @@ ur_result_t mock_urDeviceGetInfo(void *pParams) { **params.ppPropSizeRet = sizeof(deviceName); } } break; + case UR_DEVICE_INFO_PLATFORM: + if (*params.ppPropValue != nullptr) { + *reinterpret_cast(*params.ppPropValue) = + driver::d_context.platform; + } + if (*params.ppPropSizeRet != nullptr) { + **params.ppPropSizeRet = sizeof(ur_platform_handle_t); + } + break; default: return UR_RESULT_SUCCESS; } @@ -89,6 +98,29 @@ ur_result_t mock_urDeviceGetInfo(void *pParams) { ////////////////////////////////////////////////////////////////////////// context_t::context_t() { + urGetGlobalProcAddrTable(version, &urDdiTable.Global); + urGetBindlessImagesExpProcAddrTable(version, &urDdiTable.BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(version, &urDdiTable.CommandBufferExp); + urGetContextProcAddrTable(version, &urDdiTable.Context); + urGetEnqueueProcAddrTable(version, &urDdiTable.Enqueue); + urGetEnqueueExpProcAddrTable(version, &urDdiTable.EnqueueExp); + urGetEventProcAddrTable(version, &urDdiTable.Event); + urGetKernelProcAddrTable(version, &urDdiTable.Kernel); + urGetKernelExpProcAddrTable(version, &urDdiTable.KernelExp); + urGetMemProcAddrTable(version, &urDdiTable.Mem); + urGetPhysicalMemProcAddrTable(version, &urDdiTable.PhysicalMem); + urGetPlatformProcAddrTable(version, &urDdiTable.Platform); + urGetProgramProcAddrTable(version, &urDdiTable.Program); + urGetProgramExpProcAddrTable(version, &urDdiTable.ProgramExp); + urGetQueueProcAddrTable(version, &urDdiTable.Queue); + urGetSamplerProcAddrTable(version, &urDdiTable.Sampler); + urGetTensorMapExpProcAddrTable(version, &urDdiTable.TensorMapExp); + urGetUSMProcAddrTable(version, &urDdiTable.USM); + urGetUSMExpProcAddrTable(version, &urDdiTable.USMExp); + urGetUsmP2PExpProcAddrTable(version, &urDdiTable.UsmP2PExp); + urGetVirtualMemProcAddrTable(version, &urDdiTable.VirtualMem); + urGetDeviceProcAddrTable(version, &urDdiTable.Device); + mock::getCallbacks().set_replace_callback("urPlatformGetApiVersion", &mock_urPlatformGetApiVersion); // Set the default info stuff as before overrides, this way any application diff --git a/source/adapters/mock/ur_mock.hpp b/source/adapters/mock/ur_mock.hpp index 6249645fac..7aca43eb23 100644 --- a/source/adapters/mock/ur_mock.hpp +++ b/source/adapters/mock/ur_mock.hpp @@ -27,9 +27,17 @@ class __urdlllocal context_t { context_t(); ~context_t() = default; - ur_adapter_handle_t adapter = reinterpret_cast(1); - ur_device_handle_t device = reinterpret_cast(2); - ur_platform_handle_t platform = reinterpret_cast(3); + void *fake_adapter = &urDdiTable; + ur_adapter_handle_t adapter = + reinterpret_cast(&fake_adapter); + + void *fake_device = &urDdiTable; + ur_device_handle_t device = + reinterpret_cast(&fake_device); + + void *fake_platform = &urDdiTable; + ur_platform_handle_t platform = + reinterpret_cast(&fake_platform); }; extern context_t d_context; diff --git a/source/adapters/native_cpu/adapter.cpp b/source/adapters/native_cpu/adapter.cpp index 1af605286e..90f0c991d1 100644 --- a/source/adapters/native_cpu/adapter.cpp +++ b/source/adapters/native_cpu/adapter.cpp @@ -12,7 +12,7 @@ #include "common.hpp" #include "ur_api.h" -struct ur_adapter_handle_t_ { +struct ur_adapter_handle_t_ : native_cpu::object_base { std::atomic RefCount = 0; logger::Logger &logger = logger::get_logger("native_cpu"); } Adapter; diff --git a/source/adapters/native_cpu/common.hpp b/source/adapters/native_cpu/common.hpp index af0d11c5af..ea47af03a6 100644 --- a/source/adapters/native_cpu/common.hpp +++ b/source/adapters/native_cpu/common.hpp @@ -53,17 +53,24 @@ namespace ur { } // namespace ur } // namespace detail +namespace native_cpu { +struct native_cpu_ddi_getter { + static ur_dditable_t *value(); +}; +using object_base = ur_handle_base_t_; +} // namespace native_cpu + // Base class to store common data -struct _ur_object { +struct _ur_object : native_cpu::object_base { ur_shared_mutex Mutex; }; // Todo: replace this with a common helper once it is available -struct RefCounted { +struct RefCounted : native_cpu::object_base { std::atomic_uint32_t _refCount; uint32_t incrementReferenceCount() { return ++_refCount; } uint32_t decrementReferenceCount() { return --_refCount; } - RefCounted() : _refCount{1} {} + RefCounted() : native_cpu::object_base(), _refCount{1} {} uint32_t getReferenceCount() const { return _refCount; } }; diff --git a/source/adapters/native_cpu/device.hpp b/source/adapters/native_cpu/device.hpp index 2308c1a7f4..f286f02f7b 100644 --- a/source/adapters/native_cpu/device.hpp +++ b/source/adapters/native_cpu/device.hpp @@ -10,10 +10,11 @@ #pragma once +#include "common.hpp" #include "threadpool.hpp" #include -struct ur_device_handle_t_ { +struct ur_device_handle_t_ : native_cpu::object_base { native_cpu::threadpool_t tp; ur_device_handle_t_(ur_platform_handle_t ArgPlt); diff --git a/source/adapters/native_cpu/platform.hpp b/source/adapters/native_cpu/platform.hpp index 6791bba7aa..a685564760 100644 --- a/source/adapters/native_cpu/platform.hpp +++ b/source/adapters/native_cpu/platform.hpp @@ -15,6 +15,6 @@ #include "common.hpp" #include "device.hpp" -struct ur_platform_handle_t_ { +struct ur_platform_handle_t_ : native_cpu::object_base { ur_device_handle_t_ TheDevice{this}; }; diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 6683ad8d8b..694d98342d 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "common.hpp" #include #include @@ -447,3 +448,36 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( return UR_RESULT_SUCCESS; } } // extern "C" + +ur_dditable_t *native_cpu::native_cpu_ddi_getter::value() { + static std::once_flag flag; + static ur_dditable_t table; + + std::call_once(flag, []() { + urGetGlobalProcAddrTable(UR_API_VERSION_CURRENT, &table.Global); + urGetBindlessImagesExpProcAddrTable(UR_API_VERSION_CURRENT, + &table.BindlessImagesExp); + urGetCommandBufferExpProcAddrTable(UR_API_VERSION_CURRENT, + &table.CommandBufferExp); + urGetContextProcAddrTable(UR_API_VERSION_CURRENT, &table.Context); + urGetEnqueueProcAddrTable(UR_API_VERSION_CURRENT, &table.Enqueue); + urGetEnqueueExpProcAddrTable(UR_API_VERSION_CURRENT, &table.EnqueueExp); + urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &table.Event); + urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, &table.Kernel); + urGetKernelExpProcAddrTable(UR_API_VERSION_CURRENT, &table.KernelExp); + urGetMemProcAddrTable(UR_API_VERSION_CURRENT, &table.Mem); + urGetPhysicalMemProcAddrTable(UR_API_VERSION_CURRENT, &table.PhysicalMem); + urGetPlatformProcAddrTable(UR_API_VERSION_CURRENT, &table.Platform); + urGetProgramProcAddrTable(UR_API_VERSION_CURRENT, &table.Program); + urGetProgramExpProcAddrTable(UR_API_VERSION_CURRENT, &table.ProgramExp); + urGetQueueProcAddrTable(UR_API_VERSION_CURRENT, &table.Queue); + urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, &table.Sampler); + urGetTensorMapExpProcAddrTable(UR_API_VERSION_CURRENT, &table.TensorMapExp); + urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &table.USM); + urGetUSMExpProcAddrTable(UR_API_VERSION_CURRENT, &table.USMExp); + urGetUsmP2PExpProcAddrTable(UR_API_VERSION_CURRENT, &table.UsmP2PExp); + urGetVirtualMemProcAddrTable(UR_API_VERSION_CURRENT, &table.VirtualMem); + urGetDeviceProcAddrTable(UR_API_VERSION_CURRENT, &table.Device); + }); + return &table; +} diff --git a/source/common/ur_singleton.hpp b/source/common/ur_singleton.hpp deleted file mode 100644 index b95c12c5d6..0000000000 --- a/source/common/ur_singleton.hpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#ifndef UR_SINGLETON_H -#define UR_SINGLETON_H 1 - -#include -#include -#include -#include - -////////////////////////////////////////////////////////////////////////// -/// a abstract factory for creation of singleton objects -template class singleton_factory_t { - struct entry_t { - std::unique_ptr ptr; - size_t ref_count; - }; - -protected: - using singleton_t = singleton_tn; - using key_t = typename std::conditional::value, - size_t, key_tn>::type; - - using ptr_t = std::unique_ptr; - using map_t = std::unordered_map; - - /// lock for thread-safety - std::mutex mut; - /// single instance of singleton for each unique key - map_t map; - ////////////////////////////////////////////////////////////////////////// - /// extract the key from parameter list and if necessary, convert type - template - key_t getKey(key_tn key, [[maybe_unused]] Ts &&...params) { - return reinterpret_cast(key); - } - -public: - ////////////////////////////////////////////////////////////////////////// - /// default ctor/dtor - singleton_factory_t() = default; - ~singleton_factory_t() = default; - - ////////////////////////////////////////////////////////////////////////// - /// gets a pointer to a unique instance of singleton - /// if no instance exists, then creates a new instance - /// the params are forwarded to the ctor of the singleton - /// the first parameter must be the unique identifier of the instance - template singleton_tn *getInstance(Ts &&...params) { - auto key = getKey(params...); - - if (key == 0) { // No zero keys allowed in map - return static_cast(0); - } - - std::lock_guard lk(mut); - auto iter = map.find(key); - - if (map.end() == iter) { - auto ptr = std::make_unique(std::forward(params)...); - iter = map.emplace(key, entry_t{std::move(ptr), 0}).first; - } else { - iter->second.ref_count++; - } - return iter->second.ptr.get(); - } - - void retain(key_tn key) { - std::lock_guard lk(mut); - auto iter = map.find(getKey(key)); - assert(iter != map.end()); - iter->second.ref_count++; - } - - ////////////////////////////////////////////////////////////////////////// - /// once the key is no longer valid, release the singleton - void release(key_tn key) { - std::lock_guard lk(mut); - auto iter = map.find(getKey(key)); - assert(iter != map.end()); - if (iter->second.ref_count == 0) { - map.erase(iter); - } else { - iter->second.ref_count--; - } - } - - void clear() { - std::lock_guard lk(mut); - map.clear(); - } -}; - -#endif /* UR_SINGLETON_H */ diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index 931c9dd3ed..8ded789688 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -104,11 +104,9 @@ endif() target_sources(ur_loader PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/ur_object.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_loader.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_ldrddi.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/ur_ldrddi.hpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_libapi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_libddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_lib.hpp diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 81360c7126..253d517758 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -39,16 +39,8 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGet( for (auto &platform : context->platforms) { if (platform.initStatus != UR_RESULT_SUCCESS) continue; - platform.dditable.ur.Global.pfnAdapterGet(1, &phAdapters[adapterIndex], - nullptr); - try { - phAdapters[adapterIndex] = reinterpret_cast( - context->factories.ur_adapter_factory.getInstance( - phAdapters[adapterIndex], &platform.dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - break; - } + platform.dditable.Global.pfnAdapterGet(1, &phAdapters[adapterIndex], + nullptr); adapterIndex++; if (adapterIndex == NumEntries) { break; @@ -72,21 +64,15 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterRelease = dditable->ur.Global.pfnAdapterRelease; + auto *dditable = *reinterpret_cast(hAdapter); + + auto *pfnAdapterRelease = dditable->Global.pfnAdapterRelease; if (nullptr == pfnAdapterRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform result = pfnAdapterRelease(hAdapter); - // release loader handle - context->factories.ur_adapter_factory.release(hAdapter); - return result; } @@ -99,21 +85,15 @@ __urdlllocal ur_result_t UR_APICALL urAdapterRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterRetain = dditable->ur.Global.pfnAdapterRetain; + auto *dditable = *reinterpret_cast(hAdapter); + + auto *pfnAdapterRetain = dditable->Global.pfnAdapterRetain; if (nullptr == pfnAdapterRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform result = pfnAdapterRetain(hAdapter); - // increment refcount of handle - context->factories.ur_adapter_factory.retain(hAdapter); - return result; } @@ -132,15 +112,12 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGetLastError( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterGetLastError = dditable->ur.Global.pfnAdapterGetLastError; + auto *dditable = *reinterpret_cast(hAdapter); + + auto *pfnAdapterGetLastError = dditable->Global.pfnAdapterGetLastError; if (nullptr == pfnAdapterGetLastError) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform result = pfnAdapterGetLastError(hAdapter, ppMessage, pError); @@ -169,15 +146,12 @@ __urdlllocal ur_result_t UR_APICALL urAdapterGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnAdapterGetInfo = dditable->ur.Global.pfnAdapterGetInfo; + auto *dditable = *reinterpret_cast(hAdapter); + + auto *pfnAdapterGetInfo = dditable->Global.pfnAdapterGetInfo; if (nullptr == pfnAdapterGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform result = pfnAdapterGetInfo(hAdapter, propName, propSize, pPropValue, pPropSizeRet); @@ -211,18 +185,16 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGet( for (uint32_t adapter_index = 0; adapter_index < NumAdapters; adapter_index++) { // extract adapter's function pointer table - auto dditable = - reinterpret_cast(phAdapters[adapter_index]) - ->dditable; + auto *dditable = + *reinterpret_cast(phAdapters[adapter_index]); if ((0 < NumEntries) && (NumEntries == total_platform_handle_count)) break; uint32_t library_platform_handle_count = 0; - result = - dditable->ur.Platform.pfnGet(&phAdapters[adapter_index], 1, 0, nullptr, - &library_platform_handle_count); + result = dditable->Platform.pfnGet(&phAdapters[adapter_index], 1, 0, + nullptr, &library_platform_handle_count); if (UR_RESULT_SUCCESS != result) break; @@ -232,22 +204,11 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGet( library_platform_handle_count = NumEntries - total_platform_handle_count; } - result = dditable->ur.Platform.pfnGet( + result = dditable->Platform.pfnGet( &phAdapters[adapter_index], 1, library_platform_handle_count, &phPlatforms[total_platform_handle_count], nullptr); if (UR_RESULT_SUCCESS != result) break; - - try { - for (uint32_t i = 0; i < library_platform_handle_count; ++i) { - uint32_t platform_index = total_platform_handle_count + i; - phPlatforms[platform_index] = reinterpret_cast( - context->factories.ur_platform_factory.getInstance( - phPlatforms[platform_index], dditable)); - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } } total_platform_handle_count += library_platform_handle_count; @@ -281,15 +242,12 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetInfo = dditable->ur.Platform.pfnGetInfo; + auto *dditable = *reinterpret_cast(hPlatform); + + auto *pfnGetInfo = dditable->Platform.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -301,29 +259,6 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_PLATFORM_INFO_ADAPTER: { - ur_adapter_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_adapter_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_adapter_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -338,15 +273,12 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetApiVersion( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetApiVersion = dditable->ur.Platform.pfnGetApiVersion; + auto *dditable = *reinterpret_cast(hPlatform); + + auto *pfnGetApiVersion = dditable->Platform.pfnGetApiVersion; if (nullptr == pfnGetApiVersion) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform result = pfnGetApiVersion(hPlatform, pVersion); @@ -364,15 +296,12 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetNativeHandle = dditable->ur.Platform.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hPlatform); + + auto *pfnGetNativeHandle = dditable->Platform.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform result = pfnGetNativeHandle(hPlatform, phNativePlatform); @@ -397,16 +326,13 @@ __urdlllocal ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Platform.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hAdapter); + + auto *pfnCreateWithNativeHandle = + dditable->Platform.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform result = pfnCreateWithNativeHandle(hNativePlatform, hAdapter, pProperties, phPlatform); @@ -414,15 +340,6 @@ __urdlllocal ur_result_t UR_APICALL urPlatformCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phPlatform = reinterpret_cast( - context->factories.ur_platform_factory.getInstance(*phPlatform, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -440,15 +357,12 @@ __urdlllocal ur_result_t UR_APICALL urPlatformGetBackendOption( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGetBackendOption = dditable->ur.Platform.pfnGetBackendOption; + auto *dditable = *reinterpret_cast(hPlatform); + + auto *pfnGetBackendOption = dditable->Platform.pfnGetBackendOption; if (nullptr == pfnGetBackendOption) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform result = pfnGetBackendOption(hPlatform, pFrontendOption, ppPlatformOption); @@ -478,31 +392,18 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGet( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPlatform)->dditable; - auto pfnGet = dditable->ur.Device.pfnGet; + auto *dditable = *reinterpret_cast(hPlatform); + + auto *pfnGet = dditable->Device.pfnGet; if (nullptr == pfnGet) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPlatform = reinterpret_cast(hPlatform)->handle; - // forward to device-platform result = pfnGet(hPlatform, DeviceType, NumEntries, phDevices, pNumDevices); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handles to loader handles - for (size_t i = 0; (nullptr != phDevices) && (i < NumEntries); ++i) - phDevices[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(phDevices[i], - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -529,15 +430,12 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetInfo = dditable->ur.Device.pfnGetInfo; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnGetInfo = dditable->Device.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -549,65 +447,6 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_DEVICE_INFO_PLATFORM: { - ur_platform_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_platform_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_platform_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_DEVICE_INFO_PARENT_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_DEVICE_INFO_COMPONENT_DEVICES: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_DEVICE_INFO_COMPOSITE_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -620,21 +459,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnRetain = dditable->ur.Device.pfnRetain; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnRetain = dditable->Device.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnRetain(hDevice); - // increment refcount of handle - context->factories.ur_device_factory.retain(hDevice); - return result; } @@ -647,21 +480,15 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnRelease = dditable->ur.Device.pfnRelease; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnRelease = dditable->Device.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnRelease(hDevice); - // release loader handle - context->factories.ur_device_factory.release(hDevice); - return result; } @@ -685,15 +512,12 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnPartition = dditable->ur.Device.pfnPartition; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnPartition = dditable->Device.pfnPartition; if (nullptr == pfnPartition) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, pNumDevicesRet); @@ -701,16 +525,6 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handles to loader handles - for (size_t i = 0; (nullptr != phSubDevices) && (i < NumDevices); ++i) - phSubDevices[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(phSubDevices[i], - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -733,15 +547,12 @@ __urdlllocal ur_result_t UR_APICALL urDeviceSelectBinary( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnSelectBinary = dditable->ur.Device.pfnSelectBinary; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnSelectBinary = dditable->Device.pfnSelectBinary; if (nullptr == pfnSelectBinary) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnSelectBinary(hDevice, pBinaries, NumBinaries, pSelectedBinary); @@ -759,15 +570,12 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetNativeHandle = dditable->ur.Device.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnGetNativeHandle = dditable->Device.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnGetNativeHandle(hDevice, phNativeDevice); @@ -792,16 +600,12 @@ __urdlllocal ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Device.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hAdapter); + + auto *pfnCreateWithNativeHandle = dditable->Device.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - // forward to device-platform result = pfnCreateWithNativeHandle(hNativeDevice, hAdapter, pProperties, phDevice); @@ -809,14 +613,6 @@ __urdlllocal ur_result_t UR_APICALL urDeviceCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phDevice = reinterpret_cast( - context->factories.ur_device_factory.getInstance(*phDevice, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -835,15 +631,12 @@ __urdlllocal ur_result_t UR_APICALL urDeviceGetGlobalTimestamps( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetGlobalTimestamps = dditable->ur.Device.pfnGetGlobalTimestamps; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnGetGlobalTimestamps = dditable->Device.pfnGetGlobalTimestamps; if (nullptr == pfnGetGlobalTimestamps) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnGetGlobalTimestamps(hDevice, pDeviceTimestamp, pHostTimestamp); @@ -865,34 +658,18 @@ __urdlllocal ur_result_t UR_APICALL urContextCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(*phDevices)->dditable; - auto pfnCreate = dditable->ur.Context.pfnCreate; + auto *dditable = *reinterpret_cast(phDevices[0]); + + auto *pfnCreate = dditable->Context.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(DeviceCount); - for (size_t i = 0; i < DeviceCount; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = - pfnCreate(DeviceCount, phDevicesLocal.data(), pProperties, phContext); + result = pfnCreate(DeviceCount, phDevices, pProperties, phContext); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phContext = reinterpret_cast( - context->factories.ur_context_factory.getInstance(*phContext, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -905,21 +682,15 @@ __urdlllocal ur_result_t UR_APICALL urContextRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnRetain = dditable->ur.Context.pfnRetain; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnRetain = dditable->Context.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnRetain(hContext); - // increment refcount of handle - context->factories.ur_context_factory.retain(hContext); - return result; } @@ -932,21 +703,15 @@ __urdlllocal ur_result_t UR_APICALL urContextRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnRelease = dditable->ur.Context.pfnRelease; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnRelease = dditable->Context.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnRelease(hContext); - // release loader handle - context->factories.ur_context_factory.release(hContext); - return result; } @@ -973,15 +738,12 @@ __urdlllocal ur_result_t UR_APICALL urContextGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetInfo = dditable->ur.Context.pfnGetInfo; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnGetInfo = dditable->Context.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -993,29 +755,6 @@ __urdlllocal ur_result_t UR_APICALL urContextGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_CONTEXT_INFO_DEVICES: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1030,15 +769,12 @@ __urdlllocal ur_result_t UR_APICALL urContextGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetNativeHandle = dditable->ur.Context.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnGetNativeHandle = dditable->Context.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnGetNativeHandle(hContext, phNativeContext); @@ -1068,39 +804,19 @@ __urdlllocal ur_result_t UR_APICALL urContextCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hAdapter)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Context.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hAdapter); + + auto *pfnCreateWithNativeHandle = dditable->Context.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hAdapter = reinterpret_cast(hAdapter)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = - pfnCreateWithNativeHandle(hNativeContext, hAdapter, numDevices, - phDevicesLocal.data(), pProperties, phContext); + result = pfnCreateWithNativeHandle(hNativeContext, hAdapter, numDevices, + phDevices, pProperties, phContext); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phContext = reinterpret_cast( - context->factories.ur_context_factory.getInstance(*phContext, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1117,15 +833,12 @@ __urdlllocal ur_result_t UR_APICALL urContextSetExtendedDeleter( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSetExtendedDeleter = dditable->ur.Context.pfnSetExtendedDeleter; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnSetExtendedDeleter = dditable->Context.pfnSetExtendedDeleter; if (nullptr == pfnSetExtendedDeleter) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnSetExtendedDeleter(hContext, pfnDeleter, pUserData); @@ -1151,15 +864,12 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageCreate = dditable->ur.Mem.pfnImageCreate; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImageCreate = dditable->Mem.pfnImageCreate; if (nullptr == pfnImageCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnImageCreate(hContext, flags, pImageFormat, pImageDesc, pHost, phMem); @@ -1167,14 +877,6 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreate( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1195,29 +897,18 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnBufferCreate = dditable->ur.Mem.pfnBufferCreate; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnBufferCreate = dditable->Mem.pfnBufferCreate; if (nullptr == pfnBufferCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnBufferCreate(hContext, flags, size, pProperties, phBuffer); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phBuffer = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phBuffer, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1230,21 +921,15 @@ __urdlllocal ur_result_t UR_APICALL urMemRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMem)->dditable; - auto pfnRetain = dditable->ur.Mem.pfnRetain; + auto *dditable = *reinterpret_cast(hMem); + + auto *pfnRetain = dditable->Mem.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - // forward to device-platform result = pfnRetain(hMem); - // increment refcount of handle - context->factories.ur_mem_factory.retain(hMem); - return result; } @@ -1257,21 +942,15 @@ __urdlllocal ur_result_t UR_APICALL urMemRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMem)->dditable; - auto pfnRelease = dditable->ur.Mem.pfnRelease; + auto *dditable = *reinterpret_cast(hMem); + + auto *pfnRelease = dditable->Mem.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - // forward to device-platform result = pfnRelease(hMem); - // release loader handle - context->factories.ur_mem_factory.release(hMem); - return result; } @@ -1292,29 +971,18 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferPartition( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hBuffer)->dditable; - auto pfnBufferPartition = dditable->ur.Mem.pfnBufferPartition; + auto *dditable = *reinterpret_cast(hBuffer); + + auto *pfnBufferPartition = dditable->Mem.pfnBufferPartition; if (nullptr == pfnBufferPartition) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - // forward to device-platform result = pfnBufferPartition(hBuffer, flags, bufferCreateType, pRegion, phMem); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1332,19 +1000,12 @@ __urdlllocal ur_result_t UR_APICALL urMemGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMem)->dditable; - auto pfnGetNativeHandle = dditable->ur.Mem.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hMem); + + auto *pfnGetNativeHandle = dditable->Mem.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - - // convert loader handle to platform handle - hDevice = (hDevice) ? reinterpret_cast(hDevice)->handle - : nullptr; - // forward to device-platform result = pfnGetNativeHandle(hMem, hDevice, phNativeMem); @@ -1369,16 +1030,13 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnBufferCreateWithNativeHandle = - dditable->ur.Mem.pfnBufferCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnBufferCreateWithNativeHandle = + dditable->Mem.pfnBufferCreateWithNativeHandle; if (nullptr == pfnBufferCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnBufferCreateWithNativeHandle(hNativeMem, hContext, pProperties, phMem); @@ -1386,14 +1044,6 @@ __urdlllocal ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1416,16 +1066,13 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageCreateWithNativeHandle = - dditable->ur.Mem.pfnImageCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImageCreateWithNativeHandle = + dditable->Mem.pfnImageCreateWithNativeHandle; if (nullptr == pfnImageCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnImageCreateWithNativeHandle(hNativeMem, hContext, pImageFormat, pImageDesc, pProperties, phMem); @@ -1433,14 +1080,6 @@ __urdlllocal ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phMem = reinterpret_cast( - context->factories.ur_mem_factory.getInstance(*phMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1466,15 +1105,12 @@ __urdlllocal ur_result_t UR_APICALL urMemGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMemory)->dditable; - auto pfnGetInfo = dditable->ur.Mem.pfnGetInfo; + auto *dditable = *reinterpret_cast(hMemory); + + auto *pfnGetInfo = dditable->Mem.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMemory = reinterpret_cast(hMemory)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -1486,29 +1122,6 @@ __urdlllocal ur_result_t UR_APICALL urMemGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_MEM_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1534,15 +1147,12 @@ __urdlllocal ur_result_t UR_APICALL urMemImageGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hMemory)->dditable; - auto pfnImageGetInfo = dditable->ur.Mem.pfnImageGetInfo; + auto *dditable = *reinterpret_cast(hMemory); + + auto *pfnImageGetInfo = dditable->Mem.pfnImageGetInfo; if (nullptr == pfnImageGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hMemory = reinterpret_cast(hMemory)->handle; - // forward to device-platform result = pfnImageGetInfo(hMemory, propName, propSize, pPropValue, pPropSizeRet); @@ -1563,30 +1173,18 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreate = dditable->ur.Sampler.pfnCreate; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreate = dditable->Sampler.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnCreate(hContext, pDesc, phSampler); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phSampler = reinterpret_cast( - context->factories.ur_sampler_factory.getInstance(*phSampler, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1599,21 +1197,15 @@ __urdlllocal ur_result_t UR_APICALL urSamplerRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnRetain = dditable->ur.Sampler.pfnRetain; + auto *dditable = *reinterpret_cast(hSampler); + + auto *pfnRetain = dditable->Sampler.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform result = pfnRetain(hSampler); - // increment refcount of handle - context->factories.ur_sampler_factory.retain(hSampler); - return result; } @@ -1626,21 +1218,15 @@ __urdlllocal ur_result_t UR_APICALL urSamplerRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnRelease = dditable->ur.Sampler.pfnRelease; + auto *dditable = *reinterpret_cast(hSampler); + + auto *pfnRelease = dditable->Sampler.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform result = pfnRelease(hSampler); - // release loader handle - context->factories.ur_sampler_factory.release(hSampler); - return result; } @@ -1662,15 +1248,12 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnGetInfo = dditable->ur.Sampler.pfnGetInfo; + auto *dditable = *reinterpret_cast(hSampler); + + auto *pfnGetInfo = dditable->Sampler.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -1682,29 +1265,6 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_SAMPLER_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1719,15 +1279,12 @@ __urdlllocal ur_result_t UR_APICALL urSamplerGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hSampler)->dditable; - auto pfnGetNativeHandle = dditable->ur.Sampler.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hSampler); + + auto *pfnGetNativeHandle = dditable->Sampler.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform result = pfnGetNativeHandle(hSampler, phNativeSampler); @@ -1752,16 +1309,12 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Sampler.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateWithNativeHandle = dditable->Sampler.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnCreateWithNativeHandle(hNativeSampler, hContext, pProperties, phSampler); @@ -1769,15 +1322,6 @@ __urdlllocal ur_result_t UR_APICALL urSamplerCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phSampler = reinterpret_cast( - context->factories.ur_sampler_factory.getInstance(*phSampler, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -1798,19 +1342,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMHostAlloc( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnHostAlloc = dditable->ur.USM.pfnHostAlloc; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnHostAlloc = dditable->USM.pfnHostAlloc; if (nullptr == pfnHostAlloc) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform result = pfnHostAlloc(hContext, pUSMDesc, pool, size, ppMem); @@ -1836,22 +1373,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMDeviceAlloc( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnDeviceAlloc = dditable->ur.USM.pfnDeviceAlloc; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnDeviceAlloc = dditable->USM.pfnDeviceAlloc; if (nullptr == pfnDeviceAlloc) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform result = pfnDeviceAlloc(hContext, hDevice, pUSMDesc, pool, size, ppMem); @@ -1877,22 +1404,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMSharedAlloc( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSharedAlloc = dditable->ur.USM.pfnSharedAlloc; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnSharedAlloc = dditable->USM.pfnSharedAlloc; if (nullptr == pfnSharedAlloc) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform result = pfnSharedAlloc(hContext, hDevice, pUSMDesc, pool, size, ppMem); @@ -1910,15 +1427,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMFree( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnFree = dditable->ur.USM.pfnFree; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnFree = dditable->USM.pfnFree; if (nullptr == pfnFree) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnFree(hContext, pMem); @@ -1945,15 +1459,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMGetMemAllocInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetMemAllocInfo = dditable->ur.USM.pfnGetMemAllocInfo; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnGetMemAllocInfo = dditable->USM.pfnGetMemAllocInfo; if (nullptr == pfnGetMemAllocInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -1966,41 +1477,6 @@ __urdlllocal ur_result_t UR_APICALL urUSMGetMemAllocInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_USM_ALLOC_INFO_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_USM_ALLOC_INFO_POOL: { - ur_usm_pool_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_usm_pool_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_usm_pool_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2018,29 +1494,18 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPoolCreate = dditable->ur.USM.pfnPoolCreate; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnPoolCreate = dditable->USM.pfnPoolCreate; if (nullptr == pfnPoolCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnPoolCreate(hContext, pPoolDesc, ppPool); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *ppPool = reinterpret_cast( - context->factories.ur_usm_pool_factory.getInstance(*ppPool, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2053,21 +1518,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(pPool)->dditable; - auto pfnPoolRetain = dditable->ur.USM.pfnPoolRetain; + auto *dditable = *reinterpret_cast(pPool); + + auto *pfnPoolRetain = dditable->USM.pfnPoolRetain; if (nullptr == pfnPoolRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - pPool = reinterpret_cast(pPool)->handle; - // forward to device-platform result = pfnPoolRetain(pPool); - // increment refcount of handle - context->factories.ur_usm_pool_factory.retain(pPool); - return result; } @@ -2080,21 +1539,15 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(pPool)->dditable; - auto pfnPoolRelease = dditable->ur.USM.pfnPoolRelease; + auto *dditable = *reinterpret_cast(pPool); + + auto *pfnPoolRelease = dditable->USM.pfnPoolRelease; if (nullptr == pfnPoolRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - pPool = reinterpret_cast(pPool)->handle; - // forward to device-platform result = pfnPoolRelease(pPool); - // release loader handle - context->factories.ur_usm_pool_factory.release(pPool); - return result; } @@ -2116,15 +1569,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hPool)->dditable; - auto pfnPoolGetInfo = dditable->ur.USM.pfnPoolGetInfo; + auto *dditable = *reinterpret_cast(hPool); + + auto *pfnPoolGetInfo = dditable->USM.pfnPoolGetInfo; if (nullptr == pfnPoolGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPool = reinterpret_cast(hPool)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -2136,29 +1586,6 @@ __urdlllocal ur_result_t UR_APICALL urUSMPoolGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_USM_POOL_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2187,19 +1614,12 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGranularityGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGranularityGetInfo = dditable->ur.VirtualMem.pfnGranularityGetInfo; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnGranularityGetInfo = dditable->VirtualMem.pfnGranularityGetInfo; if (nullptr == pfnGranularityGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = (hDevice) ? reinterpret_cast(hDevice)->handle - : nullptr; - // forward to device-platform result = pfnGranularityGetInfo(hContext, hDevice, propName, propSize, pPropValue, pPropSizeRet); @@ -2225,15 +1645,12 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemReserve( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReserve = dditable->ur.VirtualMem.pfnReserve; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnReserve = dditable->VirtualMem.pfnReserve; if (nullptr == pfnReserve) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnReserve(hContext, pStart, size, ppStart); @@ -2253,15 +1670,12 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemFree( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnFree = dditable->ur.VirtualMem.pfnFree; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnFree = dditable->VirtualMem.pfnFree; if (nullptr == pfnFree) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnFree(hContext, pStart, size); @@ -2287,19 +1701,12 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemMap( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMap = dditable->ur.VirtualMem.pfnMap; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnMap = dditable->VirtualMem.pfnMap; if (nullptr == pfnMap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - // forward to device-platform result = pfnMap(hContext, pStart, size, hPhysicalMem, offset, flags); @@ -2319,15 +1726,12 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemUnmap( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnUnmap = dditable->ur.VirtualMem.pfnUnmap; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnUnmap = dditable->VirtualMem.pfnUnmap; if (nullptr == pfnUnmap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnUnmap(hContext, pStart, size); @@ -2349,15 +1753,12 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemSetAccess( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSetAccess = dditable->ur.VirtualMem.pfnSetAccess; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnSetAccess = dditable->VirtualMem.pfnSetAccess; if (nullptr == pfnSetAccess) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnSetAccess(hContext, pStart, size, flags); @@ -2389,15 +1790,12 @@ __urdlllocal ur_result_t UR_APICALL urVirtualMemGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnGetInfo = dditable->ur.VirtualMem.pfnGetInfo; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnGetInfo = dditable->VirtualMem.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnGetInfo(hContext, pStart, size, propName, propSize, pPropValue, pPropSizeRet); @@ -2423,33 +1821,18 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreate = dditable->ur.PhysicalMem.pfnCreate; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreate = dditable->PhysicalMem.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnCreate(hContext, hDevice, size, pProperties, phPhysicalMem); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phPhysicalMem = reinterpret_cast( - context->factories.ur_physical_mem_factory.getInstance(*phPhysicalMem, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2462,23 +1845,15 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hPhysicalMem)->dditable; - auto pfnRetain = dditable->ur.PhysicalMem.pfnRetain; + auto *dditable = *reinterpret_cast(hPhysicalMem); + + auto *pfnRetain = dditable->PhysicalMem.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - // forward to device-platform result = pfnRetain(hPhysicalMem); - // increment refcount of handle - context->factories.ur_physical_mem_factory.retain(hPhysicalMem); - return result; } @@ -2491,23 +1866,15 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hPhysicalMem)->dditable; - auto pfnRelease = dditable->ur.PhysicalMem.pfnRelease; + auto *dditable = *reinterpret_cast(hPhysicalMem); + + auto *pfnRelease = dditable->PhysicalMem.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - // forward to device-platform result = pfnRelease(hPhysicalMem); - // release loader handle - context->factories.ur_physical_mem_factory.release(hPhysicalMem); - return result; } @@ -2532,17 +1899,12 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hPhysicalMem)->dditable; - auto pfnGetInfo = dditable->ur.PhysicalMem.pfnGetInfo; + auto *dditable = *reinterpret_cast(hPhysicalMem); + + auto *pfnGetInfo = dditable->PhysicalMem.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hPhysicalMem = - reinterpret_cast(hPhysicalMem)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -2555,41 +1917,6 @@ __urdlllocal ur_result_t UR_APICALL urPhysicalMemGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_PHYSICAL_MEM_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_PHYSICAL_MEM_INFO_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2610,30 +1937,18 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithIL( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithIL = dditable->ur.Program.pfnCreateWithIL; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateWithIL = dditable->Program.pfnCreateWithIL; if (nullptr == pfnCreateWithIL) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnCreateWithIL(hContext, pIL, length, pProperties, phProgram); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2661,37 +1976,19 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithBinary( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithBinary = dditable->ur.Program.pfnCreateWithBinary; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateWithBinary = dditable->Program.pfnCreateWithBinary; if (nullptr == pfnCreateWithBinary) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = pfnCreateWithBinary(hContext, numDevices, phDevicesLocal.data(), - pLengths, ppBinaries, pProperties, phProgram); + result = pfnCreateWithBinary(hContext, numDevices, phDevices, pLengths, + ppBinaries, pProperties, phProgram); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -2708,18 +2005,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnBuild = dditable->ur.Program.pfnBuild; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnBuild = dditable->Program.pfnBuild; if (nullptr == pfnBuild) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnBuild(hContext, hProgram, pOptions); @@ -2739,18 +2030,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompile( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCompile = dditable->ur.Program.pfnCompile; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCompile = dditable->Program.pfnCompile; if (nullptr == pfnCompile) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnCompile(hContext, hProgram, pOptions); @@ -2777,34 +2062,17 @@ __urdlllocal ur_result_t UR_APICALL urProgramLink( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnLink = dditable->ur.Program.pfnLink; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnLink = dditable->Program.pfnLink; if (nullptr == pfnLink) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handles to platform handles - auto phProgramsLocal = std::vector(count); - for (size_t i = 0; i < count; ++i) - phProgramsLocal[i] = - reinterpret_cast(phPrograms[i])->handle; - // forward to device-platform - result = - pfnLink(hContext, count, phProgramsLocal.data(), pOptions, phProgram); - - try { - // convert platform handle to loader handle - if (nullptr != phProgram) - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } + result = pfnLink(hContext, count, phPrograms, pOptions, phProgram); + + if (UR_RESULT_SUCCESS != result) + return result; return result; } @@ -2818,21 +2086,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnRetain = dditable->ur.Program.pfnRetain; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnRetain = dditable->Program.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnRetain(hProgram); - // increment refcount of handle - context->factories.ur_program_factory.retain(hProgram); - return result; } @@ -2845,21 +2107,15 @@ __urdlllocal ur_result_t UR_APICALL urProgramRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnRelease = dditable->ur.Program.pfnRelease; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnRelease = dditable->Program.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnRelease(hProgram); - // release loader handle - context->factories.ur_program_factory.release(hProgram); - return result; } @@ -2880,18 +2136,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetFunctionPointer( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetFunctionPointer = dditable->ur.Program.pfnGetFunctionPointer; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnGetFunctionPointer = dditable->Program.pfnGetFunctionPointer; if (nullptr == pfnGetFunctionPointer) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnGetFunctionPointer(hDevice, hProgram, pFunctionName, ppFunctionPointer); @@ -2918,19 +2168,13 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnGetGlobalVariablePointer = - dditable->ur.Program.pfnGetGlobalVariablePointer; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnGetGlobalVariablePointer = + dditable->Program.pfnGetGlobalVariablePointer; if (nullptr == pfnGetGlobalVariablePointer) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnGetGlobalVariablePointer(hDevice, hProgram, pGlobalVariableName, pGlobalVariableSizeRet, @@ -2962,15 +2206,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnGetInfo = dditable->ur.Program.pfnGetInfo; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnGetInfo = dditable->Program.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -2982,41 +2223,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_PROGRAM_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_PROGRAM_INFO_DEVICES: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3044,18 +2250,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetBuildInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnGetBuildInfo = dditable->ur.Program.pfnGetBuildInfo; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnGetBuildInfo = dditable->Program.pfnGetBuildInfo; if (nullptr == pfnGetBuildInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnGetBuildInfo(hProgram, hDevice, propName, propSize, pPropValue, pPropSizeRet); @@ -3077,16 +2277,13 @@ __urdlllocal ur_result_t UR_APICALL urProgramSetSpecializationConstants( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnSetSpecializationConstants = - dditable->ur.Program.pfnSetSpecializationConstants; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnSetSpecializationConstants = + dditable->Program.pfnSetSpecializationConstants; if (nullptr == pfnSetSpecializationConstants) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnSetSpecializationConstants(hProgram, count, pSpecConstants); @@ -3104,15 +2301,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnGetNativeHandle = dditable->ur.Program.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnGetNativeHandle = dditable->Program.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnGetNativeHandle(hProgram, phNativeProgram); @@ -3137,16 +2331,12 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Program.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateWithNativeHandle = dditable->Program.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnCreateWithNativeHandle(hNativeProgram, hContext, pProperties, phProgram); @@ -3154,15 +2344,6 @@ __urdlllocal ur_result_t UR_APICALL urProgramCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3179,29 +2360,18 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnCreate = dditable->ur.Kernel.pfnCreate; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnCreate = dditable->Kernel.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - // forward to device-platform result = pfnCreate(hProgram, pKernelName, phKernel); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phKernel = reinterpret_cast( - context->factories.ur_kernel_factory.getInstance(*phKernel, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3224,15 +2394,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgValue = dditable->ur.Kernel.pfnSetArgValue; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSetArgValue = dditable->Kernel.pfnSetArgValue; if (nullptr == pfnSetArgValue) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnSetArgValue(hKernel, argIndex, argSize, pProperties, pArgValue); @@ -3254,15 +2421,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgLocal = dditable->ur.Kernel.pfnSetArgLocal; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSetArgLocal = dditable->Kernel.pfnSetArgLocal; if (nullptr == pfnSetArgLocal) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnSetArgLocal(hKernel, argIndex, argSize, pProperties); @@ -3292,15 +2456,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetInfo = dditable->ur.Kernel.pfnGetInfo; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnGetInfo = dditable->Kernel.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -3312,41 +2473,6 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_KERNEL_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_KERNEL_INFO_PROGRAM: { - ur_program_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_program_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_program_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3371,18 +2497,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetGroupInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetGroupInfo = dditable->ur.Kernel.pfnGetGroupInfo; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnGetGroupInfo = dditable->Kernel.pfnGetGroupInfo; if (nullptr == pfnGetGroupInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnGetGroupInfo(hKernel, hDevice, propName, propSize, pPropValue, pPropSizeRet); @@ -3411,18 +2531,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSubGroupInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetSubGroupInfo = dditable->ur.Kernel.pfnGetSubGroupInfo; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnGetSubGroupInfo = dditable->Kernel.pfnGetSubGroupInfo; if (nullptr == pfnGetSubGroupInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnGetSubGroupInfo(hKernel, hDevice, propName, propSize, pPropValue, pPropSizeRet); @@ -3439,21 +2553,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnRetain = dditable->ur.Kernel.pfnRetain; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnRetain = dditable->Kernel.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnRetain(hKernel); - // increment refcount of handle - context->factories.ur_kernel_factory.retain(hKernel); - return result; } @@ -3466,21 +2574,15 @@ __urdlllocal ur_result_t UR_APICALL urKernelRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnRelease = dditable->ur.Kernel.pfnRelease; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnRelease = dditable->Kernel.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnRelease(hKernel); - // release loader handle - context->factories.ur_kernel_factory.release(hKernel); - return result; } @@ -3500,15 +2602,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgPointer = dditable->ur.Kernel.pfnSetArgPointer; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSetArgPointer = dditable->Kernel.pfnSetArgPointer; if (nullptr == pfnSetArgPointer) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnSetArgPointer(hKernel, argIndex, pProperties, pArgValue); @@ -3533,15 +2632,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetExecInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetExecInfo = dditable->ur.Kernel.pfnSetExecInfo; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSetExecInfo = dditable->Kernel.pfnSetExecInfo; if (nullptr == pfnSetExecInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnSetExecInfo(hKernel, propName, propSize, pProperties, pPropValue); @@ -3563,18 +2659,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgSampler( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgSampler = dditable->ur.Kernel.pfnSetArgSampler; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSetArgSampler = dditable->Kernel.pfnSetArgSampler; if (nullptr == pfnSetArgSampler) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hArgValue = reinterpret_cast(hArgValue)->handle; - // forward to device-platform result = pfnSetArgSampler(hKernel, argIndex, pProperties, hArgValue); @@ -3596,20 +2686,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetArgMemObj = dditable->ur.Kernel.pfnSetArgMemObj; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSetArgMemObj = dditable->Kernel.pfnSetArgMemObj; if (nullptr == pfnSetArgMemObj) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hArgValue = (hArgValue) - ? reinterpret_cast(hArgValue)->handle - : nullptr; - // forward to device-platform result = pfnSetArgMemObj(hKernel, argIndex, pProperties, hArgValue); @@ -3629,16 +2711,13 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetSpecializationConstants( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSetSpecializationConstants = - dditable->ur.Kernel.pfnSetSpecializationConstants; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSetSpecializationConstants = + dditable->Kernel.pfnSetSpecializationConstants; if (nullptr == pfnSetSpecializationConstants) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnSetSpecializationConstants(hKernel, count, pSpecConstants); @@ -3656,15 +2735,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetNativeHandle = dditable->ur.Kernel.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnGetNativeHandle = dditable->Kernel.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - // forward to device-platform result = pfnGetNativeHandle(hKernel, phNativeKernel); @@ -3691,21 +2767,12 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = - dditable->ur.Kernel.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateWithNativeHandle = dditable->Kernel.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hProgram = (hProgram) - ? reinterpret_cast(hProgram)->handle - : nullptr; - // forward to device-platform result = pfnCreateWithNativeHandle(hNativeKernel, hContext, hProgram, pProperties, phKernel); @@ -3713,14 +2780,6 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phKernel = reinterpret_cast( - context->factories.ur_kernel_factory.getInstance(*phKernel, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3748,19 +2807,13 @@ __urdlllocal ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnGetSuggestedLocalWorkSize = - dditable->ur.Kernel.pfnGetSuggestedLocalWorkSize; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnGetSuggestedLocalWorkSize = + dditable->Kernel.pfnGetSuggestedLocalWorkSize; if (nullptr == pfnGetSuggestedLocalWorkSize) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform result = pfnGetSuggestedLocalWorkSize(hKernel, hQueue, numWorkDim, pGlobalWorkOffset, pGlobalWorkSize, @@ -3787,15 +2840,12 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnGetInfo = dditable->ur.Queue.pfnGetInfo; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnGetInfo = dditable->Queue.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -3807,53 +2857,6 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_QUEUE_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_QUEUE_INFO_DEVICE: { - ur_device_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_device_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_device_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_QUEUE_INFO_DEVICE_DEFAULT: { - ur_queue_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_queue_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3872,32 +2875,18 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreate( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreate = dditable->ur.Queue.pfnCreate; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreate = dditable->Queue.pfnCreate; if (nullptr == pfnCreate) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnCreate(hContext, hDevice, pProperties, phQueue); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phQueue = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(*phQueue, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -3910,21 +2899,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnRetain = dditable->ur.Queue.pfnRetain; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnRetain = dditable->Queue.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform result = pfnRetain(hQueue); - // increment refcount of handle - context->factories.ur_queue_factory.retain(hQueue); - return result; } @@ -3937,21 +2920,15 @@ __urdlllocal ur_result_t UR_APICALL urQueueRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnRelease = dditable->ur.Queue.pfnRelease; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnRelease = dditable->Queue.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform result = pfnRelease(hQueue); - // release loader handle - context->factories.ur_queue_factory.release(hQueue); - return result; } @@ -3968,15 +2945,12 @@ __urdlllocal ur_result_t UR_APICALL urQueueGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnGetNativeHandle = dditable->ur.Queue.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnGetNativeHandle = dditable->Queue.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform result = pfnGetNativeHandle(hQueue, pDesc, phNativeQueue); @@ -4003,19 +2977,12 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = dditable->ur.Queue.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateWithNativeHandle = dditable->Queue.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = (hDevice) ? reinterpret_cast(hDevice)->handle - : nullptr; - // forward to device-platform result = pfnCreateWithNativeHandle(hNativeQueue, hContext, hDevice, pProperties, phQueue); @@ -4023,14 +2990,6 @@ __urdlllocal ur_result_t UR_APICALL urQueueCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phQueue = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(*phQueue, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -4043,15 +3002,12 @@ __urdlllocal ur_result_t UR_APICALL urQueueFinish( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnFinish = dditable->ur.Queue.pfnFinish; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnFinish = dditable->Queue.pfnFinish; if (nullptr == pfnFinish) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform result = pfnFinish(hQueue); @@ -4067,15 +3023,12 @@ __urdlllocal ur_result_t UR_APICALL urQueueFlush( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnFlush = dditable->ur.Queue.pfnFlush; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnFlush = dditable->Queue.pfnFlush; if (nullptr == pfnFlush) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform result = pfnFlush(hQueue); @@ -4100,15 +3053,12 @@ __urdlllocal ur_result_t UR_APICALL urEventGetInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnGetInfo = dditable->ur.Event.pfnGetInfo; + auto *dditable = *reinterpret_cast(hEvent); + + auto *pfnGetInfo = dditable->Event.pfnGetInfo; if (nullptr == pfnGetInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // this value is needed for converting adapter handles to loader handles size_t sizeret = 0; if (pPropSizeRet == NULL) @@ -4120,41 +3070,6 @@ __urdlllocal ur_result_t UR_APICALL urEventGetInfo( if (UR_RESULT_SUCCESS != result) return result; - try { - if (pPropValue != nullptr) { - switch (propName) { - case UR_EVENT_INFO_COMMAND_QUEUE: { - ur_queue_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_queue_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_queue_factory.getInstance(handles[i], - dditable)); - } - } - } break; - case UR_EVENT_INFO_CONTEXT: { - ur_context_handle_t *handles = - reinterpret_cast(pPropValue); - size_t nelements = *pPropSizeRet / sizeof(ur_context_handle_t); - for (size_t i = 0; i < nelements; ++i) { - if (handles[i] != nullptr) { - handles[i] = reinterpret_cast( - context->factories.ur_context_factory.getInstance(handles[i], - dditable)); - } - } - } break; - default: { - } break; - } - } - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -4177,15 +3092,12 @@ __urdlllocal ur_result_t UR_APICALL urEventGetProfilingInfo( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnGetProfilingInfo = dditable->ur.Event.pfnGetProfilingInfo; + auto *dditable = *reinterpret_cast(hEvent); + + auto *pfnGetProfilingInfo = dditable->Event.pfnGetProfilingInfo; if (nullptr == pfnGetProfilingInfo) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform result = pfnGetProfilingInfo(hEvent, propName, propSize, pPropValue, pPropSizeRet); @@ -4205,21 +3117,14 @@ __urdlllocal ur_result_t UR_APICALL urEventWait( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(*phEventWaitList)->dditable; - auto pfnWait = dditable->ur.Event.pfnWait; + auto *dditable = *reinterpret_cast(phEventWaitList[0]); + + auto *pfnWait = dditable->Event.pfnWait; if (nullptr == pfnWait) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handles to platform handles - auto phEventWaitListLocal = std::vector(numEvents); - for (size_t i = 0; i < numEvents; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnWait(numEvents, phEventWaitListLocal.data()); + result = pfnWait(numEvents, phEventWaitList); return result; } @@ -4233,21 +3138,15 @@ __urdlllocal ur_result_t UR_APICALL urEventRetain( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnRetain = dditable->ur.Event.pfnRetain; + auto *dditable = *reinterpret_cast(hEvent); + + auto *pfnRetain = dditable->Event.pfnRetain; if (nullptr == pfnRetain) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform result = pfnRetain(hEvent); - // increment refcount of handle - context->factories.ur_event_factory.retain(hEvent); - return result; } @@ -4260,21 +3159,15 @@ __urdlllocal ur_result_t UR_APICALL urEventRelease( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnRelease = dditable->ur.Event.pfnRelease; + auto *dditable = *reinterpret_cast(hEvent); + + auto *pfnRelease = dditable->Event.pfnRelease; if (nullptr == pfnRelease) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform result = pfnRelease(hEvent); - // release loader handle - context->factories.ur_event_factory.release(hEvent); - return result; } @@ -4289,15 +3182,12 @@ __urdlllocal ur_result_t UR_APICALL urEventGetNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnGetNativeHandle = dditable->ur.Event.pfnGetNativeHandle; + auto *dditable = *reinterpret_cast(hEvent); + + auto *pfnGetNativeHandle = dditable->Event.pfnGetNativeHandle; if (nullptr == pfnGetNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform result = pfnGetNativeHandle(hEvent, phNativeEvent); @@ -4322,15 +3212,12 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateWithNativeHandle = dditable->ur.Event.pfnCreateWithNativeHandle; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateWithNativeHandle = dditable->Event.pfnCreateWithNativeHandle; if (nullptr == pfnCreateWithNativeHandle) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnCreateWithNativeHandle(hNativeEvent, hContext, pProperties, phEvent); @@ -4338,32 +3225,9 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateWithNativeHandle( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } -namespace { -struct event_callback_wrapper_data_t { - ur_event_callback_t fn; - ur_event_handle_t event; - void *userData; -}; - -void event_callback_wrapper([[maybe_unused]] ur_event_handle_t hEvent, - ur_execution_info_t execStatus, void *pUserData) { - auto *wrapper = reinterpret_cast(pUserData); - (wrapper->fn)(wrapper->event, execStatus, wrapper->userData); - delete wrapper; -} -} // namespace - /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEventSetCallback __urdlllocal ur_result_t UR_APICALL urEventSetCallback( @@ -4379,22 +3243,12 @@ __urdlllocal ur_result_t UR_APICALL urEventSetCallback( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hEvent)->dditable; - auto pfnSetCallback = dditable->ur.Event.pfnSetCallback; + auto *dditable = *reinterpret_cast(hEvent); + + auto *pfnSetCallback = dditable->Event.pfnSetCallback; if (nullptr == pfnSetCallback) return UR_RESULT_ERROR_UNINITIALIZED; - // Replace the callback with a wrapper function that gives the callback the - // loader event rather than a backend-specific event - auto *wrapper_data = - new event_callback_wrapper_data_t{pfnNotify, hEvent, pUserData}; - pUserData = wrapper_data; - pfnNotify = event_callback_wrapper; - - // convert loader handle to platform handle - hEvent = reinterpret_cast(hEvent)->handle; - // forward to device-platform result = pfnSetCallback(hEvent, execStatus, pfnNotify, pUserData); @@ -4438,42 +3292,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnKernelLaunch = dditable->ur.Enqueue.pfnKernelLaunch; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnKernelLaunch = dditable->Enqueue.pfnKernelLaunch; if (nullptr == pfnKernelLaunch) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -4499,38 +3332,19 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWait( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnEventsWait = dditable->ur.Enqueue.pfnEventsWait; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnEventsWait = dditable->Enqueue.pfnEventsWait; if (nullptr == pfnEventsWait) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnEventsWait(hQueue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + result = pfnEventsWait(hQueue, numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -4556,38 +3370,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnEventsWaitWithBarrier = dditable->ur.Enqueue.pfnEventsWaitWithBarrier; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnEventsWaitWithBarrier = dditable->Enqueue.pfnEventsWaitWithBarrier; if (nullptr == pfnEventsWaitWithBarrier) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnEventsWaitWithBarrier(hQueue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -4622,42 +3418,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferRead( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferRead = dditable->ur.Enqueue.pfnMemBufferRead; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferRead = dditable->Enqueue.pfnMemBufferRead; if (nullptr == pfnMemBufferRead) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemBufferRead(hQueue, hBuffer, blockingRead, offset, size, pDst, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -4692,42 +3466,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWrite( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferWrite = dditable->ur.Enqueue.pfnMemBufferWrite; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferWrite = dditable->Enqueue.pfnMemBufferWrite; if (nullptr == pfnMemBufferWrite) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemBufferWrite(hQueue, hBuffer, blockingWrite, offset, size, pSrc, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -4774,43 +3526,22 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferReadRect( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferReadRect = dditable->ur.Enqueue.pfnMemBufferReadRect; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferReadRect = dditable->Enqueue.pfnMemBufferReadRect; if (nullptr == pfnMemBufferReadRect) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemBufferReadRect( hQueue, hBuffer, blockingRead, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -4858,43 +3589,22 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferWriteRect = dditable->ur.Enqueue.pfnMemBufferWriteRect; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferWriteRect = dditable->Enqueue.pfnMemBufferWriteRect; if (nullptr == pfnMemBufferWriteRect) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemBufferWriteRect( hQueue, hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -4929,45 +3639,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopy( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferCopy = dditable->ur.Enqueue.pfnMemBufferCopy; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferCopy = dditable->Enqueue.pfnMemBufferCopy; if (nullptr == pfnMemBufferCopy) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBufferSrc = reinterpret_cast(hBufferSrc)->handle; - - // convert loader handle to platform handle - hBufferDst = reinterpret_cast(hBufferDst)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferCopy(hQueue, hBufferSrc, hBufferDst, srcOffset, - dstOffset, size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + result = + pfnMemBufferCopy(hQueue, hBufferSrc, hBufferDst, srcOffset, dstOffset, + size, numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5010,46 +3696,22 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferCopyRect = dditable->ur.Enqueue.pfnMemBufferCopyRect; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferCopyRect = dditable->Enqueue.pfnMemBufferCopyRect; if (nullptr == pfnMemBufferCopyRect) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBufferSrc = reinterpret_cast(hBufferSrc)->handle; - - // convert loader handle to platform handle - hBufferDst = reinterpret_cast(hBufferDst)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemBufferCopyRect(hQueue, hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5084,42 +3746,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferFill( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferFill = dditable->ur.Enqueue.pfnMemBufferFill; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferFill = dditable->Enqueue.pfnMemBufferFill; if (nullptr == pfnMemBufferFill) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferFill(hQueue, hBuffer, pPattern, patternSize, offset, - size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + result = + pfnMemBufferFill(hQueue, hBuffer, pPattern, patternSize, offset, size, + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5159,42 +3800,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageRead( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemImageRead = dditable->ur.Enqueue.pfnMemImageRead; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemImageRead = dditable->Enqueue.pfnMemImageRead; if (nullptr == pfnMemImageRead) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hImage = reinterpret_cast(hImage)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemImageRead(hQueue, hImage, blockingRead, origin, region, rowPitch, slicePitch, pDst, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5234,42 +3854,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageWrite( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemImageWrite = dditable->ur.Enqueue.pfnMemImageWrite; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemImageWrite = dditable->Enqueue.pfnMemImageWrite; if (nullptr == pfnMemImageWrite) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hImage = reinterpret_cast(hImage)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemImageWrite(hQueue, hImage, blockingWrite, origin, region, rowPitch, slicePitch, pSrc, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5307,45 +3906,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemImageCopy( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemImageCopy = dditable->ur.Enqueue.pfnMemImageCopy; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemImageCopy = dditable->Enqueue.pfnMemImageCopy; if (nullptr == pfnMemImageCopy) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hImageSrc = reinterpret_cast(hImageSrc)->handle; - - // convert loader handle to platform handle - hImageDst = reinterpret_cast(hImageDst)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemImageCopy(hQueue, hImageSrc, hImageDst, srcOrigin, dstOrigin, - region, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + result = + pfnMemImageCopy(hQueue, hImageSrc, hImageDst, srcOrigin, dstOrigin, + region, numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5383,42 +3958,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemBufferMap( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemBufferMap = dditable->ur.Enqueue.pfnMemBufferMap; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemBufferMap = dditable->Enqueue.pfnMemBufferMap; if (nullptr == pfnMemBufferMap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnMemBufferMap(hQueue, hBuffer, blockingMap, mapFlags, offset, size, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent, ppRetMap); + result = + pfnMemBufferMap(hQueue, hBuffer, blockingMap, mapFlags, offset, size, + numEventsInWaitList, phEventWaitList, phEvent, ppRetMap); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5447,41 +4001,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueMemUnmap( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnMemUnmap = dditable->ur.Enqueue.pfnMemUnmap; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnMemUnmap = dditable->Enqueue.pfnMemUnmap; if (nullptr == pfnMemUnmap) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hMem = reinterpret_cast(hMem)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnMemUnmap(hQueue, hMem, pMappedPtr, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5515,39 +4048,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMFill = dditable->ur.Enqueue.pfnUSMFill; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnUSMFill = dditable->Enqueue.pfnUSMFill; if (nullptr == pfnUSMFill) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = - pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + result = pfnUSMFill(hQueue, pMem, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5580,38 +4094,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMMemcpy = dditable->ur.Enqueue.pfnUSMMemcpy; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnUSMMemcpy = dditable->Enqueue.pfnUSMMemcpy; if (nullptr == pfnUSMMemcpy) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnUSMMemcpy(hQueue, blocking, pDst, pSrc, size, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5642,38 +4138,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMPrefetch( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMPrefetch = dditable->ur.Enqueue.pfnUSMPrefetch; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnUSMPrefetch = dditable->Enqueue.pfnUSMPrefetch; if (nullptr == pfnUSMPrefetch) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnUSMPrefetch(hQueue, pMem, size, flags, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5696,15 +4174,12 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMAdvise = dditable->ur.Enqueue.pfnUSMAdvise; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnUSMAdvise = dditable->Enqueue.pfnUSMAdvise; if (nullptr == pfnUSMAdvise) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - // forward to device-platform result = pfnUSMAdvise(hQueue, pMem, size, advice, phEvent); @@ -5712,14 +4187,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMAdvise( // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5758,39 +4225,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMFill2D( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMFill2D = dditable->ur.Enqueue.pfnUSMFill2D; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnUSMFill2D = dditable->Enqueue.pfnUSMFill2D; if (nullptr == pfnUSMFill2D) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = - pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, height, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); + result = pfnUSMFill2D(hQueue, pMem, pitch, patternSize, pPattern, width, + height, numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5830,39 +4278,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnUSMMemcpy2D = dditable->ur.Enqueue.pfnUSMMemcpy2D; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnUSMMemcpy2D = dditable->Enqueue.pfnUSMMemcpy2D; if (nullptr == pfnUSMMemcpy2D) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, - width, height, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + result = + pfnUSMMemcpy2D(hQueue, blocking, pDst, dstPitch, pSrc, srcPitch, width, + height, numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5899,43 +4329,22 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnDeviceGlobalVariableWrite = - dditable->ur.Enqueue.pfnDeviceGlobalVariableWrite; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnDeviceGlobalVariableWrite = + dditable->Enqueue.pfnDeviceGlobalVariableWrite; if (nullptr == pfnDeviceGlobalVariableWrite) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnDeviceGlobalVariableWrite( hQueue, hProgram, name, blockingWrite, count, offset, pSrc, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -5972,43 +4381,22 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnDeviceGlobalVariableRead = - dditable->ur.Enqueue.pfnDeviceGlobalVariableRead; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnDeviceGlobalVariableRead = + dditable->Enqueue.pfnDeviceGlobalVariableRead; if (nullptr == pfnDeviceGlobalVariableRead) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnDeviceGlobalVariableRead(hQueue, hProgram, name, blockingRead, count, offset, pDst, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -6047,42 +4435,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueReadHostPipe( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnReadHostPipe = dditable->ur.Enqueue.pfnReadHostPipe; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnReadHostPipe = dditable->Enqueue.pfnReadHostPipe; if (nullptr == pfnReadHostPipe) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnReadHostPipe(hQueue, hProgram, pipe_symbol, blocking, pDst, size, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -6122,42 +4488,20 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueWriteHostPipe( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnWriteHostPipe = dditable->ur.Enqueue.pfnWriteHostPipe; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnWriteHostPipe = dditable->Enqueue.pfnWriteHostPipe; if (nullptr == pfnWriteHostPipe) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnWriteHostPipe(hQueue, hProgram, pipe_symbol, blocking, pSrc, size, - numEventsInWaitList, phEventWaitListLocal.data(), - phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -6187,22 +4531,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMPitchedAllocExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnPitchedAllocExp = dditable->ur.USMExp.pfnPitchedAllocExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnPitchedAllocExp = dditable->USMExp.pfnPitchedAllocExp; if (nullptr == pfnPitchedAllocExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - pool = - (pool) ? reinterpret_cast(pool)->handle : nullptr; - // forward to device-platform result = pfnPitchedAllocExp(hContext, hDevice, pUSMDesc, pool, widthInBytes, height, elementSizeBytes, ppMem, pResultPitch); @@ -6224,19 +4558,13 @@ urBindlessImagesUnsampledImageHandleDestroyExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnUnsampledImageHandleDestroyExp = - dditable->ur.BindlessImagesExp.pfnUnsampledImageHandleDestroyExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnUnsampledImageHandleDestroyExp = + dditable->BindlessImagesExp.pfnUnsampledImageHandleDestroyExp; if (nullptr == pfnUnsampledImageHandleDestroyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnUnsampledImageHandleDestroyExp(hContext, hDevice, hImage); @@ -6257,19 +4585,13 @@ urBindlessImagesSampledImageHandleDestroyExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSampledImageHandleDestroyExp = - dditable->ur.BindlessImagesExp.pfnSampledImageHandleDestroyExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnSampledImageHandleDestroyExp = + dditable->BindlessImagesExp.pfnSampledImageHandleDestroyExp; if (nullptr == pfnSampledImageHandleDestroyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnSampledImageHandleDestroyExp(hContext, hDevice, hImage); @@ -6293,18 +4615,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageAllocateExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageAllocateExp = dditable->ur.BindlessImagesExp.pfnImageAllocateExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImageAllocateExp = dditable->BindlessImagesExp.pfnImageAllocateExp; if (nullptr == pfnImageAllocateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnImageAllocateExp(hContext, hDevice, pImageFormat, pImageDesc, phImageMem); @@ -6328,18 +4644,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageFreeExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageFreeExp = dditable->ur.BindlessImagesExp.pfnImageFreeExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImageFreeExp = dditable->BindlessImagesExp.pfnImageFreeExp; if (nullptr == pfnImageFreeExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnImageFreeExp(hContext, hDevice, hImageMem); @@ -6365,19 +4675,13 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesUnsampledImageCreateExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnUnsampledImageCreateExp = - dditable->ur.BindlessImagesExp.pfnUnsampledImageCreateExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnUnsampledImageCreateExp = + dditable->BindlessImagesExp.pfnUnsampledImageCreateExp; if (nullptr == pfnUnsampledImageCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnUnsampledImageCreateExp(hContext, hDevice, hImageMem, pImageFormat, pImageDesc, phImage); @@ -6409,22 +4713,13 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSampledImageCreateExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnSampledImageCreateExp = - dditable->ur.BindlessImagesExp.pfnSampledImageCreateExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnSampledImageCreateExp = + dditable->BindlessImagesExp.pfnSampledImageCreateExp; if (nullptr == pfnSampledImageCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hSampler = reinterpret_cast(hSampler)->handle; - // forward to device-platform result = pfnSampledImageCreateExp(hContext, hDevice, hImageMem, pImageFormat, pImageDesc, hSampler, phImage); @@ -6473,40 +4768,21 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageCopyExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnImageCopyExp = dditable->ur.BindlessImagesExp.pfnImageCopyExp; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnImageCopyExp = dditable->BindlessImagesExp.pfnImageCopyExp; if (nullptr == pfnImageCopyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnImageCopyExp(hQueue, pSrc, pDst, pSrcImageDesc, pDstImageDesc, pSrcImageFormat, pDstImageFormat, pCopyRegion, - imageCopyFlags, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + imageCopyFlags, numEventsInWaitList, phEventWaitList, + phEvent); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -6527,15 +4803,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImageGetInfoExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImageGetInfoExp = dditable->ur.BindlessImagesExp.pfnImageGetInfoExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImageGetInfoExp = dditable->BindlessImagesExp.pfnImageGetInfoExp; if (nullptr == pfnImageGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnImageGetInfoExp(hContext, hImageMem, propName, pPropValue, pPropSizeRet); @@ -6560,19 +4833,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMipmapGetLevelExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMipmapGetLevelExp = - dditable->ur.BindlessImagesExp.pfnMipmapGetLevelExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnMipmapGetLevelExp = dditable->BindlessImagesExp.pfnMipmapGetLevelExp; if (nullptr == pfnMipmapGetLevelExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnMipmapGetLevelExp(hContext, hDevice, hImageMem, mipmapLevel, phImageMem); @@ -6596,18 +4862,12 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMipmapFreeExp = dditable->ur.BindlessImagesExp.pfnMipmapFreeExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnMipmapFreeExp = dditable->BindlessImagesExp.pfnMipmapFreeExp; if (nullptr == pfnMipmapFreeExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnMipmapFreeExp(hContext, hDevice, hMem); @@ -6633,19 +4893,13 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImportExternalMemoryExp = - dditable->ur.BindlessImagesExp.pfnImportExternalMemoryExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImportExternalMemoryExp = + dditable->BindlessImagesExp.pfnImportExternalMemoryExp; if (nullptr == pfnImportExternalMemoryExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnImportExternalMemoryExp(hContext, hDevice, size, memHandleType, pExternalMemDesc, phExternalMem); @@ -6653,15 +4907,6 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phExternalMem = reinterpret_cast( - context->factories.ur_exp_external_mem_factory.getInstance( - *phExternalMem, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -6684,23 +4929,13 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMapExternalArrayExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMapExternalArrayExp = - dditable->ur.BindlessImagesExp.pfnMapExternalArrayExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnMapExternalArrayExp = + dditable->BindlessImagesExp.pfnMapExternalArrayExp; if (nullptr == pfnMapExternalArrayExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalMem = - reinterpret_cast(hExternalMem)->handle; - // forward to device-platform result = pfnMapExternalArrayExp(hContext, hDevice, pImageFormat, pImageDesc, hExternalMem, phImageMem); @@ -6730,23 +4965,13 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesMapExternalLinearMemoryExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnMapExternalLinearMemoryExp = - dditable->ur.BindlessImagesExp.pfnMapExternalLinearMemoryExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnMapExternalLinearMemoryExp = + dditable->BindlessImagesExp.pfnMapExternalLinearMemoryExp; if (nullptr == pfnMapExternalLinearMemoryExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalMem = - reinterpret_cast(hExternalMem)->handle; - // forward to device-platform result = pfnMapExternalLinearMemoryExp(hContext, hDevice, offset, size, hExternalMem, ppRetMem); @@ -6767,29 +4992,16 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesReleaseExternalMemoryExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReleaseExternalMemoryExp = - dditable->ur.BindlessImagesExp.pfnReleaseExternalMemoryExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnReleaseExternalMemoryExp = + dditable->BindlessImagesExp.pfnReleaseExternalMemoryExp; if (nullptr == pfnReleaseExternalMemoryExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalMem = - reinterpret_cast(hExternalMem)->handle; - // forward to device-platform result = pfnReleaseExternalMemoryExp(hContext, hDevice, hExternalMem); - // release loader handle - context->factories.ur_exp_external_mem_factory.release(hExternalMem); - return result; } @@ -6810,19 +5022,13 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImportExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnImportExternalSemaphoreExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImportExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnImportExternalSemaphoreExp; if (nullptr == pfnImportExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnImportExternalSemaphoreExp(hContext, hDevice, semHandleType, pExternalSemaphoreDesc, @@ -6831,15 +5037,6 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImportExternalSemaphoreExp( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phExternalSemaphore = reinterpret_cast( - context->factories.ur_exp_external_semaphore_factory.getInstance( - *phExternalSemaphore, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -6856,32 +5053,17 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesReleaseExternalSemaphoreExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReleaseExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnReleaseExternalSemaphoreExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnReleaseExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnReleaseExternalSemaphoreExp; if (nullptr == pfnReleaseExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // convert loader handle to platform handle - hExternalSemaphore = - reinterpret_cast(hExternalSemaphore) - ->handle; - // forward to device-platform result = pfnReleaseExternalSemaphoreExp(hContext, hDevice, hExternalSemaphore); - // release loader handle - context->factories.ur_exp_external_semaphore_factory.release( - hExternalSemaphore); - return result; } @@ -6915,45 +5097,21 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesWaitExternalSemaphoreExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnWaitExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnWaitExternalSemaphoreExp; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnWaitExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnWaitExternalSemaphoreExp; if (nullptr == pfnWaitExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hSemaphore = - reinterpret_cast(hSemaphore) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnWaitExternalSemaphoreExp(hQueue, hSemaphore, hasWaitValue, waitValue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -6987,45 +5145,21 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesSignalExternalSemaphoreExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnSignalExternalSemaphoreExp = - dditable->ur.BindlessImagesExp.pfnSignalExternalSemaphoreExp; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnSignalExternalSemaphoreExp = + dditable->BindlessImagesExp.pfnSignalExternalSemaphoreExp; if (nullptr == pfnSignalExternalSemaphoreExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hSemaphore = - reinterpret_cast(hSemaphore) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnSignalExternalSemaphoreExp(hQueue, hSemaphore, hasSignalValue, signalValue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7044,33 +5178,18 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCreateExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnCreateExp = dditable->ur.CommandBufferExp.pfnCreateExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnCreateExp = dditable->CommandBufferExp.pfnCreateExp; if (nullptr == pfnCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnCreateExp(hContext, hDevice, pCommandBufferDesc, phCommandBuffer); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phCommandBuffer = reinterpret_cast( - context->factories.ur_exp_command_buffer_factory.getInstance( - *phCommandBuffer, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7083,25 +5202,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnRetainExp = dditable->ur.CommandBufferExp.pfnRetainExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnRetainExp = dditable->CommandBufferExp.pfnRetainExp; if (nullptr == pfnRetainExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform result = pfnRetainExp(hCommandBuffer); - // increment refcount of handle - context->factories.ur_exp_command_buffer_factory.retain(hCommandBuffer); - return result; } @@ -7114,25 +5223,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnReleaseExp = dditable->ur.CommandBufferExp.pfnReleaseExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnReleaseExp = dditable->CommandBufferExp.pfnReleaseExp; if (nullptr == pfnReleaseExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform result = pfnReleaseExp(hCommandBuffer); - // release loader handle - context->factories.ur_exp_command_buffer_factory.release(hCommandBuffer); - return result; } @@ -7145,19 +5244,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferFinalizeExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnFinalizeExp = dditable->ur.CommandBufferExp.pfnFinalizeExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnFinalizeExp = dditable->CommandBufferExp.pfnFinalizeExp; if (nullptr == pfnFinalizeExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform result = pfnFinalizeExp(hCommandBuffer); @@ -7214,66 +5306,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendKernelLaunchExp = - dditable->ur.CommandBufferExp.pfnAppendKernelLaunchExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendKernelLaunchExp = + dditable->CommandBufferExp.pfnAppendKernelLaunchExp; if (nullptr == pfnAppendKernelLaunchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phKernelAlternativesLocal = - std::vector(numKernelAlternatives); - for (size_t i = 0; i < numKernelAlternatives; ++i) - phKernelAlternativesLocal[i] = - reinterpret_cast(phKernelAlternatives[i])->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendKernelLaunchExp( hCommandBuffer, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numKernelAlternatives, phKernelAlternativesLocal.data(), + pLocalWorkSize, numKernelAlternatives, phKernelAlternatives, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + phEventWaitList, pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7311,55 +5360,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMMemcpyExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMMemcpyExp = - dditable->ur.CommandBufferExp.pfnAppendUSMMemcpyExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendUSMMemcpyExp = + dditable->CommandBufferExp.pfnAppendUSMMemcpyExp; if (nullptr == pfnAppendUSMMemcpyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMMemcpyExp( - hCommandBuffer, pDst, pSrc, size, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); + result = pfnAppendUSMMemcpyExp(hCommandBuffer, pDst, pSrc, size, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7399,54 +5415,21 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMFillExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMFillExp = dditable->ur.CommandBufferExp.pfnAppendUSMFillExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendUSMFillExp = dditable->CommandBufferExp.pfnAppendUSMFillExp; if (nullptr == pfnAppendUSMFillExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMFillExp( - hCommandBuffer, pMemory, pPattern, patternSize, size, - numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + result = pfnAppendUSMFillExp(hCommandBuffer, pMemory, pPattern, patternSize, + size, numSyncPointsInWaitList, + pSyncPointWaitList, numEventsInWaitList, + phEventWaitList, pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7488,61 +5471,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferCopyExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendMemBufferCopyExp = + dditable->CommandBufferExp.pfnAppendMemBufferCopyExp; if (nullptr == pfnAppendMemBufferCopyExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hSrcMem = reinterpret_cast(hSrcMem)->handle; - - // convert loader handle to platform handle - hDstMem = reinterpret_cast(hDstMem)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendMemBufferCopyExp( hCommandBuffer, hSrcMem, hDstMem, srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + phEventWaitList, pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7582,58 +5526,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferWriteExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendMemBufferWriteExp = + dditable->CommandBufferExp.pfnAppendMemBufferWriteExp; if (nullptr == pfnAppendMemBufferWriteExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendMemBufferWriteExp( hCommandBuffer, hBuffer, offset, size, pSrc, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7673,58 +5581,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferReadExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferReadExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendMemBufferReadExp = + dditable->CommandBufferExp.pfnAppendMemBufferReadExp; if (nullptr == pfnAppendMemBufferReadExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendMemBufferReadExp( hCommandBuffer, hBuffer, offset, size, pDst, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); + pSyncPointWaitList, numEventsInWaitList, phEventWaitList, pSyncPoint, + phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7774,62 +5646,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferCopyRectExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferCopyRectExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendMemBufferCopyRectExp = + dditable->CommandBufferExp.pfnAppendMemBufferCopyRectExp; if (nullptr == pfnAppendMemBufferCopyRectExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hSrcMem = reinterpret_cast(hSrcMem)->handle; - - // convert loader handle to platform handle - hDstMem = reinterpret_cast(hDstMem)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendMemBufferCopyRectExp( hCommandBuffer, hSrcMem, hDstMem, srcOrigin, dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + phEventWaitList, pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7882,59 +5715,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferWriteRectExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferWriteRectExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferWriteRectExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendMemBufferWriteRectExp = + dditable->CommandBufferExp.pfnAppendMemBufferWriteRectExp; if (nullptr == pfnAppendMemBufferWriteRectExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendMemBufferWriteRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + phEventWaitList, pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -7986,59 +5783,23 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferReadRectExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferReadRectExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferReadRectExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendMemBufferReadRectExp = + dditable->CommandBufferExp.pfnAppendMemBufferReadRectExp; if (nullptr == pfnAppendMemBufferReadRectExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendMemBufferReadRectExp( hCommandBuffer, hBuffer, bufferOffset, hostOffset, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + phEventWaitList, pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -8080,58 +5841,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendMemBufferFillExp = - dditable->ur.CommandBufferExp.pfnAppendMemBufferFillExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendMemBufferFillExp = + dditable->CommandBufferExp.pfnAppendMemBufferFillExp; if (nullptr == pfnAppendMemBufferFillExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hBuffer = reinterpret_cast(hBuffer)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnAppendMemBufferFillExp( hCommandBuffer, hBuffer, pPattern, patternSize, offset, size, numSyncPointsInWaitList, pSyncPointWaitList, numEventsInWaitList, - phEventWaitListLocal.data(), pSyncPoint, phEvent, phCommand); + phEventWaitList, pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -8169,55 +5894,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMPrefetchExp = - dditable->ur.CommandBufferExp.pfnAppendUSMPrefetchExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendUSMPrefetchExp = + dditable->CommandBufferExp.pfnAppendUSMPrefetchExp; if (nullptr == pfnAppendUSMPrefetchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMPrefetchExp( - hCommandBuffer, pMemory, size, flags, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); + result = pfnAppendUSMPrefetchExp(hCommandBuffer, pMemory, size, flags, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -8255,55 +5947,22 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnAppendUSMAdviseExp = - dditable->ur.CommandBufferExp.pfnAppendUSMAdviseExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnAppendUSMAdviseExp = + dditable->CommandBufferExp.pfnAppendUSMAdviseExp; if (nullptr == pfnAppendUSMAdviseExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnAppendUSMAdviseExp( - hCommandBuffer, pMemory, size, advice, numSyncPointsInWaitList, - pSyncPointWaitList, numEventsInWaitList, phEventWaitListLocal.data(), - pSyncPoint, phEvent, phCommand); + result = pfnAppendUSMAdviseExp(hCommandBuffer, pMemory, size, advice, + numSyncPointsInWaitList, pSyncPointWaitList, + numEventsInWaitList, phEventWaitList, + pSyncPoint, phEvent, phCommand); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - try { - // convert platform handle to loader handle - if (nullptr != phCommand) - *phCommand = reinterpret_cast( - context->factories.ur_exp_command_buffer_command_factory.getInstance( - *phCommand, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -8330,45 +5989,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnEnqueueExp = dditable->ur.CommandBufferExp.pfnEnqueueExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnEnqueueExp = dditable->CommandBufferExp.pfnEnqueueExp; if (nullptr == pfnEnqueueExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnEnqueueExp(hCommandBuffer, hQueue, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -8381,25 +6014,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferRetainCommandExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnRetainCommandExp = dditable->ur.CommandBufferExp.pfnRetainCommandExp; + auto *dditable = *reinterpret_cast(hCommand); + + auto *pfnRetainCommandExp = dditable->CommandBufferExp.pfnRetainCommandExp; if (nullptr == pfnRetainCommandExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - // forward to device-platform result = pfnRetainCommandExp(hCommand); - // increment refcount of handle - context->factories.ur_exp_command_buffer_command_factory.retain(hCommand); - return result; } @@ -8412,26 +6035,15 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnReleaseCommandExp = - dditable->ur.CommandBufferExp.pfnReleaseCommandExp; + auto *dditable = *reinterpret_cast(hCommand); + + auto *pfnReleaseCommandExp = dditable->CommandBufferExp.pfnReleaseCommandExp; if (nullptr == pfnReleaseCommandExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - // forward to device-platform result = pfnReleaseCommandExp(hCommand); - // release loader handle - context->factories.ur_exp_command_buffer_command_factory.release(hCommand); - return result; } @@ -8447,48 +6059,13 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnUpdateKernelLaunchExp = - dditable->ur.CommandBufferExp.pfnUpdateKernelLaunchExp; + auto *dditable = *reinterpret_cast(hCommand); + + auto *pfnUpdateKernelLaunchExp = + dditable->CommandBufferExp.pfnUpdateKernelLaunchExp; if (nullptr == pfnUpdateKernelLaunchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - - // Deal with any struct parameters that have handle members we need to - // convert. - auto pUpdateKernelLaunchLocal = *pUpdateKernelLaunch; - - if (pUpdateKernelLaunchLocal.hNewKernel) - pUpdateKernelLaunchLocal.hNewKernel = - reinterpret_cast( - pUpdateKernelLaunchLocal.hNewKernel) - ->handle; - - std::vector - pUpdateKernelLaunchpNewMemObjArgList; - for (uint32_t i = 0; i < pUpdateKernelLaunch->numNewMemObjArgs; i++) { - ur_exp_command_buffer_update_memobj_arg_desc_t NewRangeStruct = - pUpdateKernelLaunchLocal.pNewMemObjArgList[i]; - if (NewRangeStruct.hNewMemObjArg) - NewRangeStruct.hNewMemObjArg = - reinterpret_cast(NewRangeStruct.hNewMemObjArg) - ->handle; - - pUpdateKernelLaunchpNewMemObjArgList.push_back(NewRangeStruct); - } - pUpdateKernelLaunchLocal.pNewMemObjArgList = - pUpdateKernelLaunchpNewMemObjArgList.data(); - - // Now that we've converted all the members update the param pointers - pUpdateKernelLaunch = &pUpdateKernelLaunchLocal; - // forward to device-platform result = pfnUpdateKernelLaunchExp(hCommand, pUpdateKernelLaunch); @@ -8506,35 +6083,19 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnUpdateSignalEventExp = - dditable->ur.CommandBufferExp.pfnUpdateSignalEventExp; + auto *dditable = *reinterpret_cast(hCommand); + + auto *pfnUpdateSignalEventExp = + dditable->CommandBufferExp.pfnUpdateSignalEventExp; if (nullptr == pfnUpdateSignalEventExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - // forward to device-platform result = pfnUpdateSignalEventExp(hCommand, phSignalEvent); if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *phSignalEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phSignalEvent, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -8553,30 +6114,16 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferUpdateWaitEventsExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnUpdateWaitEventsExp = - dditable->ur.CommandBufferExp.pfnUpdateWaitEventsExp; + auto *dditable = *reinterpret_cast(hCommand); + + auto *pfnUpdateWaitEventsExp = + dditable->CommandBufferExp.pfnUpdateWaitEventsExp; if (nullptr == pfnUpdateWaitEventsExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, - phEventWaitListLocal.data()); + result = + pfnUpdateWaitEventsExp(hCommand, numEventsInWaitList, phEventWaitList); return result; } @@ -8599,19 +6146,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferGetInfoExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommandBuffer) - ->dditable; - auto pfnGetInfoExp = dditable->ur.CommandBufferExp.pfnGetInfoExp; + auto *dditable = *reinterpret_cast(hCommandBuffer); + + auto *pfnGetInfoExp = dditable->CommandBufferExp.pfnGetInfoExp; if (nullptr == pfnGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommandBuffer = - reinterpret_cast(hCommandBuffer) - ->handle; - // forward to device-platform result = pfnGetInfoExp(hCommandBuffer, propName, propSize, pPropValue, pPropSizeRet); @@ -8637,20 +6177,12 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferCommandGetInfoExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(hCommand) - ->dditable; - auto pfnCommandGetInfoExp = - dditable->ur.CommandBufferExp.pfnCommandGetInfoExp; + auto *dditable = *reinterpret_cast(hCommand); + + auto *pfnCommandGetInfoExp = dditable->CommandBufferExp.pfnCommandGetInfoExp; if (nullptr == pfnCommandGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hCommand = - reinterpret_cast(hCommand) - ->handle; - // forward to device-platform result = pfnCommandGetInfoExp(hCommand, propName, propSize, pPropValue, pPropSizeRet); @@ -8695,44 +6227,22 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnCooperativeKernelLaunchExp = - dditable->ur.EnqueueExp.pfnCooperativeKernelLaunchExp; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnCooperativeKernelLaunchExp = + dditable->EnqueueExp.pfnCooperativeKernelLaunchExp; if (nullptr == pfnCooperativeKernelLaunchExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnCooperativeKernelLaunchExp(hQueue, hKernel, workDim, - pGlobalWorkOffset, pGlobalWorkSize, - pLocalWorkSize, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + result = pfnCooperativeKernelLaunchExp( + hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, + pLocalWorkSize, numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -8760,19 +6270,13 @@ __urdlllocal ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hKernel)->dditable; - auto pfnSuggestMaxCooperativeGroupCountExp = - dditable->ur.KernelExp.pfnSuggestMaxCooperativeGroupCountExp; + auto *dditable = *reinterpret_cast(hKernel); + + auto *pfnSuggestMaxCooperativeGroupCountExp = + dditable->KernelExp.pfnSuggestMaxCooperativeGroupCountExp; if (nullptr == pfnSuggestMaxCooperativeGroupCountExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnSuggestMaxCooperativeGroupCountExp( hKernel, hDevice, workDim, pLocalWorkSize, dynamicSharedMemorySize, @@ -8812,38 +6316,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnTimestampRecordingExp = - dditable->ur.EnqueueExp.pfnTimestampRecordingExp; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnTimestampRecordingExp = + dditable->EnqueueExp.pfnTimestampRecordingExp; if (nullptr == pfnTimestampRecordingExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnTimestampRecordingExp(hQueue, blocking, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -8890,44 +6377,23 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnKernelLaunchCustomExp = - dditable->ur.EnqueueExp.pfnKernelLaunchCustomExp; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnKernelLaunchCustomExp = + dditable->EnqueueExp.pfnKernelLaunchCustomExp; if (nullptr == pfnKernelLaunchCustomExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handle to platform handle - hKernel = reinterpret_cast(hKernel)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnKernelLaunchCustomExp( hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numPropsInLaunchPropList, launchPropList, - numEventsInWaitList, phEventWaitListLocal.data(), phEvent); + numEventsInWaitList, phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -8947,23 +6413,14 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnBuildExp = dditable->ur.ProgramExp.pfnBuildExp; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnBuildExp = dditable->ProgramExp.pfnBuildExp; if (nullptr == pfnBuildExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = pfnBuildExp(hProgram, numDevices, phDevicesLocal.data(), pOptions); + result = pfnBuildExp(hProgram, numDevices, phDevices, pOptions); return result; } @@ -8983,23 +6440,14 @@ __urdlllocal ur_result_t UR_APICALL urProgramCompileExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hProgram)->dditable; - auto pfnCompileExp = dditable->ur.ProgramExp.pfnCompileExp; + auto *dditable = *reinterpret_cast(hProgram); + + auto *pfnCompileExp = dditable->ProgramExp.pfnCompileExp; if (nullptr == pfnCompileExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hProgram = reinterpret_cast(hProgram)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - // forward to device-platform - result = pfnCompileExp(hProgram, numDevices, phDevicesLocal.data(), pOptions); + result = pfnCompileExp(hProgram, numDevices, phDevices, pOptions); return result; } @@ -9028,40 +6476,18 @@ __urdlllocal ur_result_t UR_APICALL urProgramLinkExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnLinkExp = dditable->ur.ProgramExp.pfnLinkExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnLinkExp = dditable->ProgramExp.pfnLinkExp; if (nullptr == pfnLinkExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - - // convert loader handles to platform handles - auto phDevicesLocal = std::vector(numDevices); - for (size_t i = 0; i < numDevices; ++i) - phDevicesLocal[i] = - reinterpret_cast(phDevices[i])->handle; - - // convert loader handles to platform handles - auto phProgramsLocal = std::vector(count); - for (size_t i = 0; i < count; ++i) - phProgramsLocal[i] = - reinterpret_cast(phPrograms[i])->handle; - // forward to device-platform - result = pfnLinkExp(hContext, numDevices, phDevicesLocal.data(), count, - phProgramsLocal.data(), pOptions, phProgram); + result = pfnLinkExp(hContext, numDevices, phDevices, count, phPrograms, + pOptions, phProgram); - try { - // convert platform handle to loader handle - if (nullptr != phProgram) - *phProgram = reinterpret_cast( - context->factories.ur_program_factory.getInstance(*phProgram, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } + if (UR_RESULT_SUCCESS != result) + return result; return result; } @@ -9079,15 +6505,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMImportExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnImportExp = dditable->ur.USMExp.pfnImportExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnImportExp = dditable->USMExp.pfnImportExp; if (nullptr == pfnImportExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnImportExp(hContext, pMem, size); @@ -9105,15 +6528,12 @@ __urdlllocal ur_result_t UR_APICALL urUSMReleaseExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hContext)->dditable; - auto pfnReleaseExp = dditable->ur.USMExp.pfnReleaseExp; + auto *dditable = *reinterpret_cast(hContext); + + auto *pfnReleaseExp = dditable->USMExp.pfnReleaseExp; if (nullptr == pfnReleaseExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hContext = reinterpret_cast(hContext)->handle; - // forward to device-platform result = pfnReleaseExp(hContext, pMem); @@ -9131,19 +6551,12 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PEnablePeerAccessExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(commandDevice)->dditable; - auto pfnEnablePeerAccessExp = dditable->ur.UsmP2PExp.pfnEnablePeerAccessExp; + auto *dditable = *reinterpret_cast(commandDevice); + + auto *pfnEnablePeerAccessExp = dditable->UsmP2PExp.pfnEnablePeerAccessExp; if (nullptr == pfnEnablePeerAccessExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - commandDevice = reinterpret_cast(commandDevice)->handle; - - // convert loader handle to platform handle - peerDevice = reinterpret_cast(peerDevice)->handle; - // forward to device-platform result = pfnEnablePeerAccessExp(commandDevice, peerDevice); @@ -9161,19 +6574,12 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PDisablePeerAccessExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(commandDevice)->dditable; - auto pfnDisablePeerAccessExp = dditable->ur.UsmP2PExp.pfnDisablePeerAccessExp; + auto *dditable = *reinterpret_cast(commandDevice); + + auto *pfnDisablePeerAccessExp = dditable->UsmP2PExp.pfnDisablePeerAccessExp; if (nullptr == pfnDisablePeerAccessExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - commandDevice = reinterpret_cast(commandDevice)->handle; - - // convert loader handle to platform handle - peerDevice = reinterpret_cast(peerDevice)->handle; - // forward to device-platform result = pfnDisablePeerAccessExp(commandDevice, peerDevice); @@ -9205,19 +6611,12 @@ __urdlllocal ur_result_t UR_APICALL urUsmP2PPeerAccessGetInfoExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = - reinterpret_cast(commandDevice)->dditable; - auto pfnPeerAccessGetInfoExp = dditable->ur.UsmP2PExp.pfnPeerAccessGetInfoExp; + auto *dditable = *reinterpret_cast(commandDevice); + + auto *pfnPeerAccessGetInfoExp = dditable->UsmP2PExp.pfnPeerAccessGetInfoExp; if (nullptr == pfnPeerAccessGetInfoExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - commandDevice = reinterpret_cast(commandDevice)->handle; - - // convert loader handle to platform handle - peerDevice = reinterpret_cast(peerDevice)->handle; - // forward to device-platform result = pfnPeerAccessGetInfoExp(commandDevice, peerDevice, propName, propSize, pPropValue, pPropSizeRet); @@ -9248,39 +6647,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrierExt( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnEventsWaitWithBarrierExt = - dditable->ur.Enqueue.pfnEventsWaitWithBarrierExt; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnEventsWaitWithBarrierExt = + dditable->Enqueue.pfnEventsWaitWithBarrierExt; if (nullptr == pfnEventsWaitWithBarrierExt) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform result = pfnEventsWaitWithBarrierExt(hQueue, pProperties, numEventsInWaitList, - phEventWaitListLocal.data(), phEvent); + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -9320,45 +6701,21 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hQueue)->dditable; - auto pfnNativeCommandExp = dditable->ur.EnqueueExp.pfnNativeCommandExp; + auto *dditable = *reinterpret_cast(hQueue); + + auto *pfnNativeCommandExp = dditable->EnqueueExp.pfnNativeCommandExp; if (nullptr == pfnNativeCommandExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hQueue = reinterpret_cast(hQueue)->handle; - - // convert loader handles to platform handles - auto phMemListLocal = std::vector(numMemsInMemList); - for (size_t i = 0; i < numMemsInMemList; ++i) - phMemListLocal[i] = - reinterpret_cast(phMemList[i])->handle; - - // convert loader handles to platform handles - auto phEventWaitListLocal = - std::vector(numEventsInWaitList); - for (size_t i = 0; i < numEventsInWaitList; ++i) - phEventWaitListLocal[i] = - reinterpret_cast(phEventWaitList[i])->handle; - // forward to device-platform - result = pfnNativeCommandExp( - hQueue, pfnNativeEnqueue, data, numMemsInMemList, phMemListLocal.data(), - pProperties, numEventsInWaitList, phEventWaitListLocal.data(), phEvent); + result = pfnNativeCommandExp(hQueue, pfnNativeEnqueue, data, numMemsInMemList, + phMemList, pProperties, numEventsInWaitList, + phEventWaitList, phEvent); // In the event of ERROR_ADAPTER_SPECIFIC we should still attempt to wrap any // output handles below. if (UR_RESULT_SUCCESS != result && UR_RESULT_ERROR_ADAPTER_SPECIFIC != result) return result; - try { - // convert platform handle to loader handle - if (nullptr != phEvent) - *phEvent = reinterpret_cast( - context->factories.ur_event_factory.getInstance(*phEvent, dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } return result; } @@ -9406,15 +6763,12 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnEncodeIm2ColExp = dditable->ur.TensorMapExp.pfnEncodeIm2ColExp; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnEncodeIm2ColExp = dditable->TensorMapExp.pfnEncodeIm2ColExp; if (nullptr == pfnEncodeIm2ColExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnEncodeIm2ColExp(hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, GlobalStrides, PixelBoxLowerCorner, @@ -9425,15 +6779,6 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *hTensorMap = reinterpret_cast( - context->factories.ur_exp_tensor_map_factory.getInstance(*hTensorMap, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -9476,15 +6821,12 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( [[maybe_unused]] auto context = getContext(); - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnEncodeTiledExp = dditable->ur.TensorMapExp.pfnEncodeTiledExp; + auto *dditable = *reinterpret_cast(hDevice); + + auto *pfnEncodeTiledExp = dditable->TensorMapExp.pfnEncodeTiledExp; if (nullptr == pfnEncodeTiledExp) return UR_RESULT_ERROR_UNINITIALIZED; - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - // forward to device-platform result = pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, GlobalAddress, @@ -9494,15 +6836,6 @@ __urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( if (UR_RESULT_SUCCESS != result) return result; - try { - // convert platform handle to loader handle - *hTensorMap = reinterpret_cast( - context->factories.ur_exp_tensor_map_factory.getInstance(*hTensorMap, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - return result; } @@ -9547,7 +6880,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( "urGetGlobalProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Global); + platform.initStatus = getTable(version, &platform.dditable.Global); } if (UR_RESULT_SUCCESS == result) { @@ -9561,8 +6894,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetGlobalProcAddrTable( pDdiTable->pfnAdapterGetInfo = ur_loader::urAdapterGetInfo; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Global; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Global; } } @@ -9605,7 +6937,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( if (!getTable) continue; platform.initStatus = - getTable(version, &platform.dditable.ur.BindlessImagesExp); + getTable(version, &platform.dditable.BindlessImagesExp); } if (UR_RESULT_SUCCESS == result) { @@ -9647,9 +6979,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetBindlessImagesExpProcAddrTable( ur_loader::urBindlessImagesSignalExternalSemaphoreExp; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext() - ->platforms.front() - .dditable.ur.BindlessImagesExp; + *pDdiTable = + ur_loader::getContext()->platforms.front().dditable.BindlessImagesExp; } } @@ -9692,7 +7023,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( if (!getTable) continue; platform.initStatus = - getTable(version, &platform.dditable.ur.CommandBufferExp); + getTable(version, &platform.dditable.CommandBufferExp); } if (UR_RESULT_SUCCESS == result) { @@ -9743,9 +7074,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( ur_loader::urCommandBufferCommandGetInfoExp; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext() - ->platforms.front() - .dditable.ur.CommandBufferExp; + *pDdiTable = + ur_loader::getContext()->platforms.front().dditable.CommandBufferExp; } } @@ -9787,7 +7117,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( "urGetContextProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Context); + platform.initStatus = getTable(version, &platform.dditable.Context); } if (UR_RESULT_SUCCESS == result) { @@ -9804,8 +7134,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetContextProcAddrTable( pDdiTable->pfnSetExtendedDeleter = ur_loader::urContextSetExtendedDeleter; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Context; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Context; } } @@ -9847,7 +7176,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( "urGetEnqueueProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Enqueue); + platform.initStatus = getTable(version, &platform.dditable.Enqueue); } if (UR_RESULT_SUCCESS == result) { @@ -9886,8 +7215,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( ur_loader::urEnqueueEventsWaitWithBarrierExt; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Enqueue; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Enqueue; } } @@ -9929,7 +7257,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( "urGetEnqueueExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.EnqueueExp); + platform.initStatus = getTable(version, &platform.dditable.EnqueueExp); } if (UR_RESULT_SUCCESS == result) { @@ -9946,7 +7274,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.EnqueueExp; + ur_loader::getContext()->platforms.front().dditable.EnqueueExp; } } @@ -9988,7 +7316,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( "urGetEventProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Event); + platform.initStatus = getTable(version, &platform.dditable.Event); } if (UR_RESULT_SUCCESS == result) { @@ -10006,7 +7334,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( pDdiTable->pfnSetCallback = ur_loader::urEventSetCallback; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.Event; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Event; } } @@ -10048,7 +7376,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( "urGetKernelProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Kernel); + platform.initStatus = getTable(version, &platform.dditable.Kernel); } if (UR_RESULT_SUCCESS == result) { @@ -10076,8 +7404,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable( ur_loader::urKernelSetSpecializationConstants; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Kernel; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Kernel; } } @@ -10119,7 +7446,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( "urGetKernelExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.KernelExp); + platform.initStatus = getTable(version, &platform.dditable.KernelExp); } if (UR_RESULT_SUCCESS == result) { @@ -10131,7 +7458,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.KernelExp; + ur_loader::getContext()->platforms.front().dditable.KernelExp; } } @@ -10173,7 +7500,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( "urGetMemProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Mem); + platform.initStatus = getTable(version, &platform.dditable.Mem); } if (UR_RESULT_SUCCESS == result) { @@ -10194,7 +7521,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetMemProcAddrTable( pDdiTable->pfnImageGetInfo = ur_loader::urMemImageGetInfo; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.Mem; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Mem; } } @@ -10236,7 +7563,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( "urGetPhysicalMemProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.PhysicalMem); + platform.initStatus = getTable(version, &platform.dditable.PhysicalMem); } if (UR_RESULT_SUCCESS == result) { @@ -10250,7 +7577,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPhysicalMemProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.PhysicalMem; + ur_loader::getContext()->platforms.front().dditable.PhysicalMem; } } @@ -10292,7 +7619,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( "urGetPlatformProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Platform); + platform.initStatus = getTable(version, &platform.dditable.Platform); } if (UR_RESULT_SUCCESS == result) { @@ -10308,8 +7635,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetPlatformProcAddrTable( pDdiTable->pfnGetBackendOption = ur_loader::urPlatformGetBackendOption; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Platform; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Platform; } } @@ -10351,7 +7677,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( "urGetProgramProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Program); + platform.initStatus = getTable(version, &platform.dditable.Program); } if (UR_RESULT_SUCCESS == result) { @@ -10377,8 +7703,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramProcAddrTable( ur_loader::urProgramCreateWithNativeHandle; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Program; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Program; } } @@ -10420,7 +7745,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( "urGetProgramExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.ProgramExp); + platform.initStatus = getTable(version, &platform.dditable.ProgramExp); } if (UR_RESULT_SUCCESS == result) { @@ -10433,7 +7758,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.ProgramExp; + ur_loader::getContext()->platforms.front().dditable.ProgramExp; } } @@ -10475,7 +7800,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( "urGetQueueProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Queue); + platform.initStatus = getTable(version, &platform.dditable.Queue); } if (UR_RESULT_SUCCESS == result) { @@ -10493,7 +7818,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetQueueProcAddrTable( pDdiTable->pfnFlush = ur_loader::urQueueFlush; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.Queue; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Queue; } } @@ -10535,7 +7860,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( "urGetSamplerProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Sampler); + platform.initStatus = getTable(version, &platform.dditable.Sampler); } if (UR_RESULT_SUCCESS == result) { @@ -10551,8 +7876,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( ur_loader::urSamplerCreateWithNativeHandle; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Sampler; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Sampler; } } @@ -10594,7 +7918,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( "urGetTensorMapExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.TensorMapExp); + platform.initStatus = getTable(version, &platform.dditable.TensorMapExp); } if (UR_RESULT_SUCCESS == result) { @@ -10606,7 +7930,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.TensorMapExp; + ur_loader::getContext()->platforms.front().dditable.TensorMapExp; } } @@ -10648,7 +7972,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( "urGetUSMProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.USM); + platform.initStatus = getTable(version, &platform.dditable.USM); } if (UR_RESULT_SUCCESS == result) { @@ -10666,7 +7990,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable( pDdiTable->pfnPoolGetInfo = ur_loader::urUSMPoolGetInfo; } else { // return pointers directly to platform's DDIs - *pDdiTable = ur_loader::getContext()->platforms.front().dditable.ur.USM; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.USM; } } @@ -10708,7 +8032,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( "urGetUSMExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.USMExp); + platform.initStatus = getTable(version, &platform.dditable.USMExp); } if (UR_RESULT_SUCCESS == result) { @@ -10720,8 +8044,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUSMExpProcAddrTable( pDdiTable->pfnReleaseExp = ur_loader::urUSMReleaseExp; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.USMExp; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.USMExp; } } @@ -10763,7 +8086,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( "urGetUsmP2PExpProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.UsmP2PExp); + platform.initStatus = getTable(version, &platform.dditable.UsmP2PExp); } if (UR_RESULT_SUCCESS == result) { @@ -10779,7 +8102,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.UsmP2PExp; + ur_loader::getContext()->platforms.front().dditable.UsmP2PExp; } } @@ -10821,7 +8144,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( "urGetVirtualMemProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.VirtualMem); + platform.initStatus = getTable(version, &platform.dditable.VirtualMem); } if (UR_RESULT_SUCCESS == result) { @@ -10839,7 +8162,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( } else { // return pointers directly to platform's DDIs *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.VirtualMem; + ur_loader::getContext()->platforms.front().dditable.VirtualMem; } } @@ -10881,7 +8204,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( "urGetDeviceProcAddrTable")); if (!getTable) continue; - platform.initStatus = getTable(version, &platform.dditable.ur.Device); + platform.initStatus = getTable(version, &platform.dditable.Device); } if (UR_RESULT_SUCCESS == result) { @@ -10901,8 +8224,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetDeviceProcAddrTable( ur_loader::urDeviceGetGlobalTimestamps; } else { // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.Device; + *pDdiTable = ur_loader::getContext()->platforms.front().dditable.Device; } } diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp deleted file mode 100644 index e1a5e69c42..0000000000 --- a/source/loader/ur_ldrddi.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. - * See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file ur_ldrddi.hpp - * - */ -#ifndef UR_LOADER_LDRDDI_H -#define UR_LOADER_LDRDDI_H 1 - -#include "ur_object.hpp" -#include "ur_singleton.hpp" - -namespace ur_loader { -/////////////////////////////////////////////////////////////////////////////// - -using ur_adapter_object_t = object_t; -using ur_adapter_factory_t = - singleton_factory_t; - -using ur_platform_object_t = object_t; -using ur_platform_factory_t = - singleton_factory_t; - -using ur_device_object_t = object_t; -using ur_device_factory_t = - singleton_factory_t; - -using ur_context_object_t = object_t; -using ur_context_factory_t = - singleton_factory_t; - -using ur_event_object_t = object_t; -using ur_event_factory_t = - singleton_factory_t; - -using ur_program_object_t = object_t; -using ur_program_factory_t = - singleton_factory_t; - -using ur_kernel_object_t = object_t; -using ur_kernel_factory_t = - singleton_factory_t; - -using ur_queue_object_t = object_t; -using ur_queue_factory_t = - singleton_factory_t; - -using ur_sampler_object_t = object_t; -using ur_sampler_factory_t = - singleton_factory_t; - -using ur_mem_object_t = object_t; -using ur_mem_factory_t = singleton_factory_t; - -using ur_physical_mem_object_t = object_t; -using ur_physical_mem_factory_t = - singleton_factory_t; - -using ur_usm_pool_object_t = object_t; -using ur_usm_pool_factory_t = - singleton_factory_t; - -using ur_exp_external_mem_object_t = object_t; -using ur_exp_external_mem_factory_t = - singleton_factory_t; - -using ur_exp_external_semaphore_object_t = - object_t; -using ur_exp_external_semaphore_factory_t = - singleton_factory_t; - -using ur_exp_command_buffer_object_t = object_t; -using ur_exp_command_buffer_factory_t = - singleton_factory_t; - -using ur_exp_command_buffer_command_object_t = - object_t; -using ur_exp_command_buffer_command_factory_t = - singleton_factory_t; - -using ur_exp_tensor_map_object_t = object_t; -using ur_exp_tensor_map_factory_t = - singleton_factory_t; - -struct handle_factories { - ur_adapter_factory_t ur_adapter_factory; - ur_platform_factory_t ur_platform_factory; - ur_device_factory_t ur_device_factory; - ur_context_factory_t ur_context_factory; - ur_event_factory_t ur_event_factory; - ur_program_factory_t ur_program_factory; - ur_kernel_factory_t ur_kernel_factory; - ur_queue_factory_t ur_queue_factory; - ur_sampler_factory_t ur_sampler_factory; - ur_mem_factory_t ur_mem_factory; - ur_physical_mem_factory_t ur_physical_mem_factory; - ur_usm_pool_factory_t ur_usm_pool_factory; - ur_exp_external_mem_factory_t ur_exp_external_mem_factory; - ur_exp_external_semaphore_factory_t ur_exp_external_semaphore_factory; - ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; - ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; - ur_exp_tensor_map_factory_t ur_exp_tensor_map_factory; -}; - -} // namespace ur_loader - -#endif /* UR_LOADER_LDRDDI_H */ diff --git a/source/loader/ur_lib.cpp b/source/loader/ur_lib.cpp index ef46895861..a60997489f 100644 --- a/source/loader/ur_lib.cpp +++ b/source/loader/ur_lib.cpp @@ -17,7 +17,6 @@ #ifndef NOMINMAX #define NOMINMAX #include "ur_api.h" -#include "ur_ldrddi.hpp" #endif // !NOMINMAX #include "logger/ur_logger.hpp" diff --git a/source/loader/ur_loader.hpp b/source/loader/ur_loader.hpp index 4ed36264ee..0f700fd2e8 100644 --- a/source/loader/ur_loader.hpp +++ b/source/loader/ur_loader.hpp @@ -14,7 +14,7 @@ #define UR_LOADER_HPP 1 #include "ur_adapter_registry.hpp" -#include "ur_ldrddi.hpp" +#include "ur_ddi.h" #include "ur_lib_loader.hpp" namespace ur_loader { @@ -25,7 +25,7 @@ struct platform_t { std::unique_ptr handle; ur_result_t initStatus = UR_RESULT_SUCCESS; - dditable_t dditable = {}; + ur_dditable_t dditable = {}; }; using platform_vector_t = std::vector; @@ -41,8 +41,6 @@ class context_t : public AtomicSingleton { ur_result_t init(); bool intercept_enabled = false; - - struct handle_factories factories; }; context_t *getContext(); diff --git a/source/loader/ur_object.hpp b/source/loader/ur_object.hpp deleted file mode 100644 index fa03adb91a..0000000000 --- a/source/loader/ur_object.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * - * Copyright (C) 2022-2023 Intel Corporation - * - * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM - * Exceptions. See LICENSE.TXT - * - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - * @file ur_object.hpp - * - */ - -#ifndef UR_OBJECT_H -#define UR_OBJECT_H 1 - -#include "ur_ddi.h" -#include "ur_util.hpp" - -////////////////////////////////////////////////////////////////////////// -struct dditable_t { - ur_dditable_t ur; -}; - -////////////////////////////////////////////////////////////////////////// -template class object_t { -public: - using handle_t = _handle_t; - - handle_t handle; - dditable_t *dditable; - - object_t() = delete; - - object_t(handle_t _handle, dditable_t *_dditable) - : handle(_handle), dditable(_dditable) {} - - ~object_t() = default; -}; - -#endif /* UR_OBJECT_H */ diff --git a/source/ur/ur.hpp b/source/ur/ur.hpp index 5a23a88ba9..5e8f75e394 100644 --- a/source/ur/ur.hpp +++ b/source/ur/ur.hpp @@ -193,6 +193,12 @@ template struct ZeCache : private T { if (!(condition)) \ return error; +struct ur_dditable_t; +template struct ur_handle_base_t_ { + ur_handle_base_t_() { ddi_table = getddi::value(); }; + ur_dditable_t *ddi_table = nullptr; +}; + // TODO: populate with target agnostic handling of UR platforms struct _ur_platform {}; diff --git a/test/loader/handles/fixtures.hpp b/test/loader/handles/fixtures.hpp index d2eaab13f3..237f99cb78 100644 --- a/test/loader/handles/fixtures.hpp +++ b/test/loader/handles/fixtures.hpp @@ -15,41 +15,9 @@ #define ASSERT_SUCCESS(ACTUAL) ASSERT_EQ(UR_RESULT_SUCCESS, ACTUAL) #endif -ur_result_t replace_urPlatformGet(void *pParams) { - const auto ¶ms = *static_cast(pParams); - - if (*params.ppNumPlatforms) { - **params.ppNumPlatforms = 1; - } - - if (*params.pphPlatforms && *params.pNumEntries == 1) { - **params.pphPlatforms = reinterpret_cast(0x1); - } - - return UR_RESULT_SUCCESS; -} - -ur_result_t replace_urDeviceGetInfo(void *pParams) { - const auto ¶ms = *static_cast(pParams); - if (*params.ppropName == UR_DEVICE_INFO_PLATFORM) { - if (*params.ppPropSizeRet) { - **params.ppPropSizeRet = sizeof(ur_platform_handle_t); - } - if (*params.ppPropValue) { - **(reinterpret_cast(params.ppPropValue)) = - reinterpret_cast(0x1); - } - } - return UR_RESULT_SUCCESS; -} - struct LoaderHandleTest : ::testing::Test { void SetUp() override { urLoaderInit(0, nullptr); - mock::getCallbacks().set_replace_callback("urDeviceGetInfo", - &replace_urDeviceGetInfo); - mock::getCallbacks().set_replace_callback("urPlatformGet", - &replace_urPlatformGet); uint32_t nadapters = 0; adapter = nullptr; ASSERT_SUCCESS(urAdapterGet(1, &adapter, &nadapters)); diff --git a/test/loader/handles/urLoaderHandles.cpp b/test/loader/handles/urLoaderHandles.cpp index 4a66ad21b5..82ebb5123f 100644 --- a/test/loader/handles/urLoaderHandles.cpp +++ b/test/loader/handles/urLoaderHandles.cpp @@ -11,7 +11,8 @@ #include TEST_F(LoaderHandleTest, Success) { - ur_platform_handle_t query_platform; + ur_platform_handle_t query_platform = + reinterpret_cast(1234); size_t retsize; ASSERT_SUCCESS(urDeviceGetInfo(device, UR_DEVICE_INFO_PLATFORM, sizeof(intptr_t), &query_platform, &retsize));