diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index d015548f01..61e5ad9966 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -12,6 +12,7 @@ #include "common.hpp" #include "context.hpp" #include "event.hpp" +#include "kernel.hpp" #include "memory.hpp" #include "queue.hpp" @@ -120,10 +121,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hKernel->get(), + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 13e952d1a4..ce2481f1a6 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -11,6 +11,7 @@ #include "common.hpp" #include "context.hpp" #include "event.hpp" +#include "kernel.hpp" #include "memory.hpp" #include "program.hpp" #include "queue.hpp" @@ -40,10 +41,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( for (uint32_t i = 0; i < numEventsInWaitList; i++) { CLWaitEvents[i] = phEventWaitList[i]->get(); } - CL_RETURN_ON_FAILURE(clEnqueueNDRangeKernel( - hQueue->get(), cl_adapter::cast(hKernel), workDim, - pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, numEventsInWaitList, - CLWaitEvents.data(), &Event)); + CL_RETURN_ON_FAILURE( + clEnqueueNDRangeKernel(hQueue->get(), hKernel->get(), workDim, + pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, CLWaitEvents.data(), &Event)); if (phEvent) { auto UREvent = std::make_unique(Event, hQueue->Context, hQueue); diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 440c981030..8993ee693f 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -7,6 +7,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include "kernel.hpp" #include "common.hpp" #include "device.hpp" #include "memory.hpp" @@ -21,9 +22,11 @@ urKernelCreate(ur_program_handle_t hProgram, const char *pKernelName, ur_kernel_handle_t *phKernel) { cl_int CLResult; - *phKernel = cl_adapter::cast( - clCreateKernel(hProgram->get(), pKernelName, &CLResult)); + cl_kernel Kernel = clCreateKernel(hProgram->get(), pKernelName, &CLResult); CL_RETURN_ON_FAILURE(CLResult); + auto URKernel = std::make_unique(Kernel, hProgram, + hProgram->Context); + *phKernel = URKernel.release(); return UR_RESULT_SUCCESS; } @@ -31,9 +34,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_value_properties_t *, const void *pArgValue) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, pArgValue)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->get(), cl_adapter::cast(argIndex), argSize, pArgValue)); return UR_RESULT_SUCCESS; } @@ -42,9 +44,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_local_properties_t *) { - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - argSize, nullptr)); + CL_RETURN_ON_FAILURE(clSetKernelArg( + hKernel->get(), cl_adapter::cast(argIndex), argSize, nullptr)); return UR_RESULT_SUCCESS; } @@ -76,6 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // We need this little bit of ugliness because the UR NUM_ARGS property is // size_t whereas the CL one is cl_uint. We should consider changing that see // #1038 @@ -83,7 +85,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, if (pPropSizeRet) *pPropSizeRet = sizeof(size_t); cl_uint NumArgs = 0; - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), sizeof(NumArgs), &NumArgs, nullptr)); if (pPropValue) { @@ -91,11 +93,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, return UR_RESULT_ERROR_INVALID_SIZE; *static_cast(pPropValue) = static_cast(NumArgs); } + } else if (propName == UR_KERNEL_INFO_PROGRAM) { + return ReturnValue(hKernel->Program); + } else if (propName == UR_KERNEL_INFO_CONTEXT) { + return ReturnValue(hKernel->Context); } else { size_t CheckPropSize = 0; - cl_int ClResult = clGetKernelInfo(cl_adapter::cast(hKernel), - mapURKernelInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); + cl_int ClResult = + clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); if (pPropValue && CheckPropSize != propSize) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -147,8 +153,8 @@ urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( - cl_adapter::cast(hKernel), hDevice->get(), - mapURKernelGroupInfoToCL(propName), propSize, pPropValue, pPropSizeRet)); + hKernel->get(), hDevice->get(), mapURKernelGroupInfoToCL(propName), + propSize, pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } @@ -201,9 +207,8 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } cl_int Ret = clGetKernelSubGroupInfo( - cl_adapter::cast(hKernel), hDevice->get(), - mapURKernelSubGroupInfoToCL(propName), InputValueSize, InputValue.get(), - sizeof(size_t), &RetVal, pPropSizeRet); + hKernel->get(), hDevice->get(), mapURKernelSubGroupInfoToCL(propName), + InputValueSize, InputValue.get(), sizeof(size_t), &RetVal, pPropSizeRet); if (Ret == CL_INVALID_OPERATION) { // clGetKernelSubGroupInfo returns CL_INVALID_OPERATION if the device does @@ -252,13 +257,13 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urKernelRetain(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clRetainKernel(cl_adapter::cast(hKernel))); + CL_RETURN_ON_FAILURE(clRetainKernel(hKernel->get())); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelRelease(ur_kernel_handle_t hKernel) { - CL_RETURN_ON_FAILURE(clReleaseKernel(cl_adapter::cast(hKernel))); + CL_RETURN_ON_FAILURE(clReleaseKernel(hKernel->get())); return UR_RESULT_SUCCESS; } @@ -276,19 +281,18 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { /* We test that each alloc type is supported before we actually try to set * KernelExecInfo. */ - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, + sizeof(cl_context), &CLContext, + nullptr)); UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &HFunc)); if (HFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -296,10 +300,9 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_ext::DeviceMemAllocName, &DFunc)); if (DFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( @@ -307,10 +310,9 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { cl_ext::SharedMemAllocName, &SFunc)); if (SFunc) { - CL_RETURN_ON_FAILURE( - clSetKernelExecInfo(cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, - sizeof(cl_bool), &TrueVal)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo( + hKernel->get(), CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL, + sizeof(cl_bool), &TrueVal)); } return UR_RESULT_SUCCESS; } @@ -332,9 +334,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_USM_PTRS: { - CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - cl_adapter::cast(hKernel), - CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, propSize, pPropValue)); + CL_RETURN_ON_FAILURE(clSetKernelExecInfo(hKernel->get(), + CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, + propSize, pPropValue)); return UR_RESULT_SUCCESS; } default: { @@ -348,9 +350,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( const ur_kernel_arg_pointer_properties_t *, const void *pArgValue) { cl_context CLContext; - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - CL_KERNEL_CONTEXT, sizeof(cl_context), - &CLContext, nullptr)); + CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->get(), CL_KERNEL_CONTEXT, + sizeof(cl_context), &CLContext, + nullptr)); clSetKernelArgMemPointerINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( @@ -364,9 +366,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( * deref the arg to get the pointer value */ auto PtrToPtr = reinterpret_cast(pArgValue); auto DerefPtr = reinterpret_cast(*PtrToPtr); - CL_RETURN_ON_FAILURE(FuncPtr(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - DerefPtr)); + CL_RETURN_ON_FAILURE( + FuncPtr(hKernel->get(), cl_adapter::cast(argIndex), DerefPtr)); } return UR_RESULT_SUCCESS; @@ -374,15 +375,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( ur_kernel_handle_t hKernel, ur_native_handle_t *phNativeKernel) { - *phNativeKernel = reinterpret_cast(hKernel); + *phNativeKernel = reinterpret_cast(hKernel->get()); return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( - ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, + ur_native_handle_t hNativeKernel, ur_context_handle_t hContext, + ur_program_handle_t hProgram, const ur_kernel_native_properties_t *pProperties, ur_kernel_handle_t *phKernel) { - *phKernel = reinterpret_cast(hNativeKernel); + cl_kernel NativeHandle = reinterpret_cast(hNativeKernel); + auto URKernel = + std::make_unique(NativeHandle, hProgram, hContext); + UR_RETURN_ON_FAILURE(URKernel->initWithNative()); + *phKernel = URKernel.release(); + if (!pProperties || !pProperties->isNativeHandleOwned) { return urKernelRetain(*phKernel); } @@ -394,7 +401,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( const ur_kernel_arg_mem_obj_properties_t *, ur_mem_handle_t hArgValue) { cl_mem CLArgValue = hArgValue ? hArgValue->get() : nullptr; - CL_RETURN_ON_FAILURE(clSetKernelArg(cl_adapter::cast(hKernel), + CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->get(), cl_adapter::cast(argIndex), sizeof(CLArgValue), &CLArgValue)); return UR_RESULT_SUCCESS; @@ -405,9 +412,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { cl_sampler CLArgSampler = hArgValue->get(); - cl_int RetErr = clSetKernelArg(cl_adapter::cast(hKernel), - cl_adapter::cast(argIndex), - sizeof(CLArgSampler), &CLArgSampler); + cl_int RetErr = + clSetKernelArg(hKernel->get(), cl_adapter::cast(argIndex), + sizeof(CLArgSampler), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.hpp b/source/adapters/opencl/kernel.hpp new file mode 100644 index 0000000000..3323fb68c7 --- /dev/null +++ b/source/adapters/opencl/kernel.hpp @@ -0,0 +1,54 @@ +//===--------- kernel.hpp - OpenCL Adapter ---------------------------===// +// +// Copyright (C) 2023 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "common.hpp" +#include "context.hpp" + +#include + +struct ur_kernel_handle_t_ { + using native_type = cl_kernel; + native_type Kernel; + ur_program_handle_t Program; + ur_context_handle_t Context; + + ur_kernel_handle_t_(native_type Kernel, ur_program_handle_t Program, + ur_context_handle_t Context) + : Kernel(Kernel), Program(Program), Context(Context) {} + + ~ur_kernel_handle_t_() {} + + ur_result_t initWithNative() { + if (!Program) { + cl_program CLProgram; + CL_RETURN_ON_FAILURE(clGetKernelInfo( + Kernel, CL_KERNEL_PROGRAM, sizeof(CLProgram), &CLProgram, nullptr)); + ur_native_handle_t NativeProgram = + reinterpret_cast(CLProgram); + UR_RETURN_ON_FAILURE(urProgramCreateWithNativeHandle( + NativeProgram, nullptr, nullptr, &Program)); + } + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetKernelInfo( + Kernel, CL_KERNEL_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + if (!Context) { + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &Context)); + } else if (Context->get() != CLContext) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + return UR_RESULT_SUCCESS; + } + + native_type get() { return Kernel; } +}; diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index f633d109a6..6e2054c546 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -226,8 +226,8 @@ cl_map_flags convertURMemFlagsToCL(ur_mem_flags_t URFlags) { UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( ur_context_handle_t hContext, ur_mem_flags_t flags, size_t size, const ur_buffer_properties_t *pProperties, ur_mem_handle_t *phBuffer) { - cl_int RetErr = CL_INVALID_OPERATION; + UR_RETURN_ON_FAILURE(urContextRetain(hContext)); if (pProperties) { // TODO: need to check if all properties are supported by OpenCL RT and // ignore unsupported diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index ddeb276870..8fa7056bcb 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -87,7 +87,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, ur_platform_handle_t *phPlatforms, uint32_t *pNumPlatforms) { - static std::vector> URPlatforms; + static std::vector URPlatforms; static std::once_flag InitFlag; static uint32_t NumPlatforms = 0; cl_int Result = CL_SUCCESS; @@ -105,10 +105,10 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, if (Result != CL_SUCCESS) { return Result; } - URPlatforms.resize(NumPlatforms); for (uint32_t i = 0; i < NumPlatforms; i++) { - URPlatforms[i] = + auto URPlatform = std::make_unique(CLPlatforms[i]); + URPlatforms.emplace_back(URPlatform.release()); } return Result; }, @@ -126,7 +126,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } if (NumEntries && phPlatforms) { for (uint32_t i = 0; i < NumEntries; i++) { - phPlatforms[i] = URPlatforms[i].get(); + phPlatforms[i] = URPlatforms[i]; } } return mapCLErrorToUR(Result); diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index a647cd64be..5c326d8097 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -351,6 +351,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( auto URProgram = std::make_unique(NativeHandle, hContext); + UR_RETURN_ON_FAILURE(URProgram->initWithNative()); *phProgram = URProgram.release(); if (!pProperties || !pProperties->isNativeHandleOwned) { return urProgramRetain(*phProgram); diff --git a/source/adapters/opencl/program.hpp b/source/adapters/opencl/program.hpp index 84f486b7e8..5c40cdc0b2 100644 --- a/source/adapters/opencl/program.hpp +++ b/source/adapters/opencl/program.hpp @@ -23,5 +23,18 @@ struct ur_program_handle_t_ { ~ur_program_handle_t_() {} + ur_result_t initWithNative() { + if (!Context) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetProgramInfo( + Program, CL_PROGRAM_CONTEXT, sizeof(CLContext), &CLContext, nullptr)); + ur_native_handle_t NativeContext = + reinterpret_cast(CLContext); + UR_RETURN_ON_FAILURE(urContextCreateWithNativeHandle( + NativeContext, 0, nullptr, nullptr, &Context)); + } + return UR_RESULT_SUCCESS; + } + native_type get() { return Program; } }; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 241936a6ad..ea541b0cb9 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -559,7 +559,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( default: return UR_RESULT_ERROR_INVALID_VALUE; } - + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + if (propName == UR_USM_ALLOC_INFO_DEVICE) { + return ReturnValue(Context->Devices[0]); + } size_t CheckPropSize = 0; cl_int ClErr = GetMemAllocInfo(Context->get(), pMem, PropNameCL, propSize, pPropValue, &CheckPropSize);