Skip to content

Commit

Permalink
Merge branch 'oneapi-src:main' into add-graph-bench
Browse files Browse the repository at this point in the history
  • Loading branch information
mateuszpn authored Jan 15, 2025
2 parents 5c3a7cd + 9e48f54 commit ac80ea5
Show file tree
Hide file tree
Showing 11 changed files with 380 additions and 195 deletions.
3 changes: 3 additions & 0 deletions source/adapters/level_zero/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,9 @@ void zeParseError(ze_result_t ZeError, const char *&ErrorString);
#define ZE_CALL_NOCHECK(ZeName, ZeArgs) \
ZeCall().doCall(ZeName ZeArgs, #ZeName, #ZeArgs, false)

#define ZE_CALL_NOCHECK_NAME(ZeName, ZeArgs, callName) \
ZeCall().doCall(ZeName ZeArgs, callName, #ZeArgs, false)

// This wrapper around std::atomic is created to limit operations with reference
// counter and to make allowed operations more transparent in terms of
// thread-safety in the plugin. increment() and load() operations do not need a
Expand Down
36 changes: 25 additions & 11 deletions source/adapters/level_zero/v2/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,23 @@

#include "../common.hpp"
#include "logger/ur_logger.hpp"
namespace {
#define DECLARE_DESTROY_FUNCTION(name) \
template <typename ZeHandleT> ze_result_t name##_wrapped(ZeHandleT handle) { \
return ZE_CALL_NOCHECK_NAME(name, (handle), #name); \
}

#define HANDLE_WRAPPER_TYPE(handle, destroy) \
ze_handle_wrapper<handle, destroy##_wrapped<handle>>
} // namespace

namespace v2 {

DECLARE_DESTROY_FUNCTION(zeKernelDestroy);
DECLARE_DESTROY_FUNCTION(zeEventDestroy);
DECLARE_DESTROY_FUNCTION(zeEventPoolDestroy);
DECLARE_DESTROY_FUNCTION(zeContextDestroy);
DECLARE_DESTROY_FUNCTION(zeCommandListDestroy);
namespace raii {

template <typename ZeHandleT, ze_result_t (*destroy)(ZeHandleT)>
Expand Down Expand Up @@ -65,7 +79,7 @@ struct ze_handle_wrapper {
}

if (ownZeHandle) {
auto zeResult = ZE_CALL_NOCHECK(destroy, (handle));
auto zeResult = destroy(handle);
// Gracefully handle the case that L0 was already unloaded.
if (zeResult && zeResult != ZE_RESULT_ERROR_UNINITIALIZED)
throw ze2urResult(zeResult);
Expand All @@ -89,20 +103,20 @@ struct ze_handle_wrapper {
bool ownZeHandle;
};

using ze_kernel_handle_t =
ze_handle_wrapper<::ze_kernel_handle_t, zeKernelDestroy>;
using ze_kernel_handle_t = HANDLE_WRAPPER_TYPE(::ze_kernel_handle_t,
zeKernelDestroy);

using ze_event_handle_t =
ze_handle_wrapper<::ze_event_handle_t, zeEventDestroy>;
using ze_event_handle_t = HANDLE_WRAPPER_TYPE(::ze_event_handle_t,
zeEventDestroy);

using ze_event_pool_handle_t =
ze_handle_wrapper<::ze_event_pool_handle_t, zeEventPoolDestroy>;
using ze_event_pool_handle_t = HANDLE_WRAPPER_TYPE(::ze_event_pool_handle_t,
zeEventPoolDestroy);

using ze_context_handle_t =
ze_handle_wrapper<::ze_context_handle_t, zeContextDestroy>;
using ze_context_handle_t = HANDLE_WRAPPER_TYPE(::ze_context_handle_t,
zeContextDestroy);

using ze_command_list_handle_t =
ze_handle_wrapper<::ze_command_list_handle_t, zeCommandListDestroy>;
using ze_command_list_handle_t = HANDLE_WRAPPER_TYPE(::ze_command_list_handle_t,
zeCommandListDestroy);

} // namespace raii
} // namespace v2
2 changes: 1 addition & 1 deletion source/adapters/level_zero/v2/queue_immediate_in_order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ ur_command_list_handler_t::ur_command_list_handler_t(
: commandList(hZeCommandList,
[ownZeHandle](ze_command_list_handle_t hZeCommandList) {
if (ownZeHandle) {
zeCommandListDestroy(hZeCommandList);
ZE_CALL_NOCHECK(zeCommandListDestroy, (hZeCommandList));
}
}) {}

Expand Down
6 changes: 3 additions & 3 deletions source/loader/layers/sanitizer/msan/msan_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
USMDesc.align = getAlignment();
ur_usm_pool_handle_t Pool{};
URes = getMsanInterceptor()->allocateMemory(
Context, Device, &USMDesc, Pool, Size,
Context, Device, &USMDesc, Pool, Size, AllocType::DEVICE_USM,
ur_cast<void **>(&Allocation));
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error(
Expand Down Expand Up @@ -181,8 +181,8 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) {
ur_usm_desc_t USMDesc{};
USMDesc.align = getAlignment();
ur_usm_pool_handle_t Pool{};
URes = getMsanInterceptor()->allocateMemory(
Context, nullptr, &USMDesc, Pool, Size,
URes = getContext()->urDdiTable.USM.pfnHostAlloc(
Context, &USMDesc, Pool, Size,
ur_cast<void **>(&HostAllocation));
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to allocate {} bytes host "
Expand Down
45 changes: 42 additions & 3 deletions source/loader/layers/sanitizer/msan/msan_ddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,50 @@ ur_result_t urUSMDeviceAlloc(
) {
getContext()->logger.debug("==== urUSMDeviceAlloc");

return getMsanInterceptor()->allocateMemory(hContext, hDevice, pUSMDesc,
pool, size, ppMem);
return getMsanInterceptor()->allocateMemory(
hContext, hDevice, pUSMDesc, pool, size, AllocType::DEVICE_USM, ppMem);
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Intercept function for urUSMHostAlloc
ur_result_t UR_APICALL urUSMHostAlloc(
ur_context_handle_t hContext, ///< [in] handle of the context object
const ur_usm_desc_t
*pUSMDesc, ///< [in][optional] USM memory allocation descriptor
ur_usm_pool_handle_t
pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate
size_t
size, ///< [in] size in bytes of the USM memory object to be allocated
void **ppMem ///< [out] pointer to USM host memory object
) {
getContext()->logger.debug("==== urUSMHostAlloc");

return getMsanInterceptor()->allocateMemory(
hContext, nullptr, pUSMDesc, pool, size, AllocType::HOST_USM, ppMem);
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Intercept function for urUSMSharedAlloc
ur_result_t UR_APICALL urUSMSharedAlloc(
ur_context_handle_t hContext, ///< [in] handle of the context object
ur_device_handle_t hDevice, ///< [in] handle of the device object
const ur_usm_desc_t *
pUSMDesc, ///< [in][optional] Pointer to USM memory allocation descriptor.
ur_usm_pool_handle_t
pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate
size_t
size, ///< [in] size in bytes of the USM memory object to be allocated
void **ppMem ///< [out] pointer to USM shared memory object
) {
getContext()->logger.debug("==== urUSMSharedAlloc");

return getMsanInterceptor()->allocateMemory(
hContext, hDevice, pUSMDesc, pool, size, AllocType::SHARED_USM, ppMem);
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Intercept function for urUSMFree
__urdlllocal ur_result_t UR_APICALL urUSMFree(
ur_result_t UR_APICALL urUSMFree(
ur_context_handle_t hContext, ///< [in] handle of the context object
void *pMem ///< [in] pointer to USM memory object
) {
Expand Down Expand Up @@ -1748,6 +1785,8 @@ ur_result_t urGetUSMProcAddrTable(
ur_result_t result = UR_RESULT_SUCCESS;

pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::msan::urUSMDeviceAlloc;
pDdiTable->pfnHostAlloc = ur_sanitizer_layer::msan::urUSMHostAlloc;
pDdiTable->pfnSharedAlloc = ur_sanitizer_layer::msan::urUSMSharedAlloc;
pDdiTable->pfnFree = ur_sanitizer_layer::msan::urUSMFree;

return result;
Expand Down
91 changes: 87 additions & 4 deletions source/loader/layers/sanitizer/msan/msan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,36 @@ ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context,
ur_device_handle_t Device,
const ur_usm_desc_t *Properties,
ur_usm_pool_handle_t Pool,
size_t Size, void **ResultPtr) {
size_t Size, AllocType Type,
void **ResultPtr) {

auto ContextInfo = getContextInfo(Context);
std::shared_ptr<DeviceInfo> DeviceInfo = getDeviceInfo(Device);
std::shared_ptr<DeviceInfo> DeviceInfo =
Device ? getDeviceInfo(Device) : nullptr;

void *Allocated = nullptr;

UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, Properties, Pool, Size, &Allocated));
if (Type == AllocType::DEVICE_USM) {
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, Properties, Pool, Size, &Allocated));
} else if (Type == AllocType::HOST_USM) {
UR_CALL(getContext()->urDdiTable.USM.pfnHostAlloc(
Context, Properties, Pool, Size, &Allocated));
} else if (Type == AllocType::SHARED_USM) {
UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc(
Context, Device, Properties, Pool, Size, &Allocated));
}

*ResultPtr = Allocated;

ContextInfo->MaxAllocatedSize =
std::max(ContextInfo->MaxAllocatedSize, Size);

// For host/shared usm, we only record the alloc size.
if (Type != AllocType::DEVICE_USM) {
return UR_RESULT_SUCCESS;
}

auto AI =
std::make_shared<MsanAllocInfo>(MsanAllocInfo{(uptr)Allocated,
Size,
Expand Down Expand Up @@ -145,6 +163,12 @@ ur_result_t MsanInterceptor::registerProgram(ur_program_handle_t Program) {
return Result;
}

getContext()->logger.info("registerDeviceGlobals");
Result = registerDeviceGlobals(Program);
if (Result != UR_RESULT_SUCCESS) {
return Result;
}

return Result;
}

Expand Down Expand Up @@ -213,6 +237,56 @@ ur_result_t MsanInterceptor::registerSpirKernels(ur_program_handle_t Program) {
return UR_RESULT_SUCCESS;
}

ur_result_t
MsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
std::vector<ur_device_handle_t> Devices = GetDevices(Program);
assert(Devices.size() != 0 && "No devices in registerDeviceGlobals");
auto Context = GetContext(Program);
auto ContextInfo = getContextInfo(Context);
auto ProgramInfo = getProgramInfo(Program);
assert(ProgramInfo != nullptr && "unregistered program!");

for (auto Device : Devices) {
ManagedQueue Queue(Context, Device);

size_t MetadataSize;
void *MetadataPtr;
auto Result =
getContext()->urDdiTable.Program.pfnGetGlobalVariablePointer(
Device, Program, kSPIR_MsanDeviceGlobalMetadata, &MetadataSize,
&MetadataPtr);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.info("No device globals");
continue;
}

const uint64_t NumOfDeviceGlobal =
MetadataSize / sizeof(DeviceGlobalInfo);
assert((MetadataSize % sizeof(DeviceGlobalInfo) == 0) &&
"DeviceGlobal metadata size is not correct");
std::vector<DeviceGlobalInfo> GVInfos(NumOfDeviceGlobal);
Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy(
Queue, true, &GVInfos[0], MetadataPtr,
sizeof(DeviceGlobalInfo) * NumOfDeviceGlobal, 0, nullptr, nullptr);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error("Device Global[{}] Read Failed: {}",
kSPIR_MsanDeviceGlobalMetadata, Result);
return Result;
}

auto DeviceInfo = getMsanInterceptor()->getDeviceInfo(Device);
for (size_t i = 0; i < NumOfDeviceGlobal; i++) {
const auto &GVInfo = GVInfos[i];
UR_CALL(DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, GVInfo.Addr,
GVInfo.Size, 0));
ContextInfo->MaxAllocatedSize =
std::max(ContextInfo->MaxAllocatedSize, GVInfo.Size);
}
}

return UR_RESULT_SUCCESS;
}

ur_result_t MsanInterceptor::insertContext(ur_context_handle_t Context,
std::shared_ptr<ContextInfo> &CI) {
std::scoped_lock<ur_shared_mutex> Guard(m_ContextMapMutex);
Expand Down Expand Up @@ -380,10 +454,14 @@ ur_result_t MsanInterceptor::prepareLaunch(
}

// Set LaunchInfo
auto ContextInfo = getContextInfo(LaunchInfo.Context);
LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data->DeviceTy = DeviceInfo->Type;
LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0;
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
ContextInfo->Handle, DeviceInfo->Handle, nullptr, nullptr,
ContextInfo->MaxAllocatedSize, &LaunchInfo.Data->CleanShadow));

getContext()->logger.info(
"launch_info {} (GlobalShadow={}, Device={}, Debug={})",
Expand Down Expand Up @@ -466,6 +544,11 @@ ur_result_t USMLaunchInfo::initialize() {
USMLaunchInfo::~USMLaunchInfo() {
[[maybe_unused]] ur_result_t Result;
if (Data) {
if (Data->CleanShadow) {
Result = getContext()->urDdiTable.USM.pfnFree(Context,
Data->CleanShadow);
assert(Result == UR_RESULT_SUCCESS);
}
Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data);
assert(Result == UR_RESULT_SUCCESS);
}
Expand Down
9 changes: 8 additions & 1 deletion source/loader/layers/sanitizer/msan/msan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ struct ProgramInfo {

struct ContextInfo {
ur_context_handle_t Handle;
size_t MaxAllocatedSize = 1024;
std::atomic<int32_t> RefCount = 1;

std::vector<ur_device_handle_t> DeviceList;
Expand Down Expand Up @@ -159,6 +160,11 @@ struct USMLaunchInfo {
ur_result_t initialize();
};

struct DeviceGlobalInfo {
uptr Size;
uptr Addr;
};

struct SpirKernelInfo {
uptr KernelName;
uptr Size;
Expand All @@ -174,7 +180,7 @@ class MsanInterceptor {
ur_device_handle_t Device,
const ur_usm_desc_t *Properties,
ur_usm_pool_handle_t Pool, size_t Size,
void **ResultPtr);
AllocType Type, void **ResultPtr);
ur_result_t releaseMemory(ur_context_handle_t Context, void *Ptr);

ur_result_t registerProgram(ur_program_handle_t Program);
Expand Down Expand Up @@ -261,6 +267,7 @@ class MsanInterceptor {
std::shared_ptr<msan::DeviceInfo> &DeviceInfo);

ur_result_t registerSpirKernels(ur_program_handle_t Program);
ur_result_t registerDeviceGlobals(ur_program_handle_t Program);

private:
std::unordered_map<ur_context_handle_t, std::shared_ptr<msan::ContextInfo>>
Expand Down
2 changes: 1 addition & 1 deletion source/loader/layers/sanitizer/msan/msan_libdevice.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ struct MsanLaunchInfo {

MsanErrorReport Report;

uint8_t CleanShadow[128] = {};
void *CleanShadow = nullptr;
};

// Based on the observation, only the last 24 bits of the address of the private
Expand Down
11 changes: 3 additions & 8 deletions source/loader/layers/sanitizer/msan/msan_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,15 +227,10 @@ ur_result_t MsanShadowMemoryGPU::EnqueueMapShadow(
VirtualMemMaps[MappedPtr].first = PhysicalMem;
}

// We don't need to record virtual memory map for null pointer,
// since it doesn't have an alloc info.
if (Ptr == 0) {
continue;
auto AllocInfoItOp = getMsanInterceptor()->findAllocInfoByAddress(Ptr);
if (AllocInfoItOp) {
VirtualMemMaps[MappedPtr].second.insert((*AllocInfoItOp)->second);
}

auto AllocInfoIt = getMsanInterceptor()->findAllocInfoByAddress(Ptr);
assert(AllocInfoIt);
VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second);
}

return UR_RESULT_SUCCESS;
Expand Down
Loading

0 comments on commit ac80ea5

Please sign in to comment.