Skip to content

Commit

Permalink
Add handles to opencl adapter
Browse files Browse the repository at this point in the history
  • Loading branch information
omarahmed1111 authored and RossBrunton committed Jan 29, 2025
1 parent 14f4a3b commit d160a92
Show file tree
Hide file tree
Showing 23 changed files with 1,919 additions and 1,055 deletions.
5 changes: 5 additions & 0 deletions source/adapters/opencl/adapter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "logger/ur_logger.hpp"
#include "platform.hpp"

#include "CL/cl.h"
#include "logger/ur_logger.hpp"
Expand All @@ -18,6 +20,9 @@ struct ur_adapter_handle_t_ {
std::mutex Mutex;
logger::Logger &log = logger::get_logger("opencl");

std::vector<std::unique_ptr<ur_platform_handle_t_>> URPlatforms;
uint32_t NumPlatforms = 0;

// Function pointers to core OpenCL entry points which may not exist in older
// versions of the OpenCL-ICD-Loader are tracked here and initialized by
// dynamically loading the symbol by name.
Expand Down
98 changes: 58 additions & 40 deletions source/adapters/opencl/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,18 @@

#include "command_buffer.hpp"
#include "common.hpp"
#include "context.hpp"
#include "event.hpp"
#include "kernel.hpp"
#include "memory.hpp"
#include "queue.hpp"

/// The ur_exp_command_buffer_handle_t_ destructor calls CL release
/// command-buffer to free the underlying object.
ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
urQueueRelease(hInternalQueue);

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->CLContext;
cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
cl_int Res =
cl_ext::getExtFuncFromContext<decltype(clReleaseCommandBufferKHR)>(
Expand All @@ -36,7 +41,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
ur_queue_handle_t Queue = nullptr;
UR_RETURN_ON_FAILURE(urQueueCreate(hContext, hDevice, nullptr, &Queue));

cl_context CLContext = cl_adapter::cast<cl_context>(hContext);
cl_context CLContext = hContext->CLContext;
cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCreateCommandBufferKHR)>(
Expand All @@ -47,7 +52,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false;

ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
cl_device_id CLDevice = cl_adapter::cast<cl_device_id>(hDevice);
cl_device_id CLDevice = hDevice->CLDevice;
CL_RETURN_ON_FAILURE(
getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities));
bool DeviceSupportsUpdate = UpdateCapabilities > 0;
Expand All @@ -61,16 +66,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp(
IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0};

cl_int Res = CL_SUCCESS;
auto CLCommandBuffer = clCreateCommandBufferKHR(
1, cl_adapter::cast<cl_command_queue *>(&Queue), Properties, &Res);
const cl_command_queue CLQueue = Queue->CLQueue;
auto CLCommandBuffer =
clCreateCommandBufferKHR(1, &CLQueue, Properties, &Res);
CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer);

try {
auto URCommandBuffer = std::make_unique<ur_exp_command_buffer_handle_t_>(
Queue, hContext, hDevice, CLCommandBuffer, IsUpdatable);
*phCommandBuffer = URCommandBuffer.release();
} catch (...) {
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

CL_RETURN_ON_FAILURE(Res);
Expand All @@ -95,7 +103,7 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_APIEXPORT ur_result_t UR_APICALL
urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) {
UR_ASSERT(!hCommandBuffer->IsFinalized, UR_RESULT_ERROR_INVALID_OPERATION);
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clFinalizeCommandBufferKHR)>(
Expand Down Expand Up @@ -127,7 +135,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
UR_ASSERT(!(phCommandHandle && !hCommandBuffer->IsUpdatable),
UR_RESULT_ERROR_INVALID_OPERATION);

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandNDRangeKernelKHR)>(
Expand All @@ -149,10 +157,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
cl_command_properties_khr *Properties =
hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr;
CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR(
hCommandBuffer->CLCommandBuffer, nullptr, Properties,
cl_adapter::cast<cl_kernel>(hKernel), workDim, pGlobalWorkOffset,
pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, OutCommandHandle));
hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->CLKernel,
workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint,
OutCommandHandle));

try {
auto Handle = std::make_unique<ur_exp_command_buffer_command_handle_t_>(
Expand Down Expand Up @@ -212,18 +220,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp(
(void)phEventWaitList;
(void)phEvent;
(void)phCommand;
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache,
cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR));

CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory,
hDstMem->CLMemory, srcOffset, dstOffset, size, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand All @@ -250,19 +257,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp(
size_t OpenCLDstRect[3]{dstOrigin.x, dstOrigin.y, dstOrigin.z};
size_t OpenCLRegion[3]{region.width, region.height, region.depth};

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandCopyBufferRectKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache,
cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR));

CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hSrcMem), cl_adapter::cast<cl_mem>(hDstMem),
OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList,
pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->CLMemory,
hDstMem->CLMemory, OpenCLOriginRect, OpenCLDstRect, OpenCLRegion,
srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -354,17 +360,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp(
[[maybe_unused]] ur_event_handle_t *phEvent,
[[maybe_unused]] ur_exp_command_buffer_command_handle_t *phCommand) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clCommandFillBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache,
cl_ext::CommandFillBufferName, &clCommandFillBufferKHR));

CL_RETURN_ON_FAILURE(clCommandFillBufferKHR(
hCommandBuffer->CLCommandBuffer, nullptr, nullptr,
cl_adapter::cast<cl_mem>(hBuffer), pPattern, patternSize, offset, size,
numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr));
hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->CLMemory,
pPattern, patternSize, offset, size, numSyncPointsInWaitList,
pSyncPointWaitList, pSyncPoint, nullptr));

return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -420,21 +426,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent) {

cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;
cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
UR_RETURN_ON_FAILURE(
cl_ext::getExtFuncFromContext<decltype(clEnqueueCommandBufferKHR)>(
CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache,
cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR));

const uint32_t NumberOfQueues = 1;

cl_event Event;
std::vector<cl_event> CLWaitEvents(numEventsInWaitList);
for (uint32_t i = 0; i < numEventsInWaitList; i++) {
CLWaitEvents[i] = phEventWaitList[i]->CLEvent;
}
cl_command_queue CLQueue = hQueue->CLQueue;
CL_RETURN_ON_FAILURE(clEnqueueCommandBufferKHR(
NumberOfQueues, cl_adapter::cast<cl_command_queue *>(&hQueue),
hCommandBuffer->CLCommandBuffer, numEventsInWaitList,
cl_adapter::cast<const cl_event *>(phEventWaitList),
cl_adapter::cast<cl_event *>(phEvent)));

NumberOfQueues, &CLQueue, hCommandBuffer->CLCommandBuffer,
numEventsInWaitList, CLWaitEvents.data(), &Event));
if (phEvent) {
try {
auto UREvent =
std::make_unique<ur_event_handle_t_>(Event, hQueue->Context, hQueue);
*phEvent = UREvent.release();
} catch (std::bad_alloc &) {
return UR_RESULT_ERROR_OUT_OF_RESOURCES;
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}
}
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -474,11 +493,11 @@ void updateKernelArgs(std::vector<cl_mutable_dispatch_arg_khr> &CLArgs,
for (uint32_t i = 0; i < NumMemobjArgs; i++) {
const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg =
ArgMemobjList[i];
cl_mem arg_value = URMemObjArg.hNewMemObjArg->CLMemory;
cl_mutable_dispatch_arg_khr CLArg{
URMemObjArg.argIndex, // arg_index
sizeof(cl_mem), // arg_size
cl_adapter::cast<const cl_mem *>(
&URMemObjArg.hNewMemObjArg) // arg_value
&arg_value // arg_value
};

CLArgs.push_back(CLArg);
Expand Down Expand Up @@ -513,7 +532,7 @@ ur_result_t validateCommandDesc(
// Verify that the device supports updating the aspects of the kernel that
// the user is requesting.
ur_device_handle_t URDevice = Command->hCommandBuffer->hDevice;
cl_device_id CLDevice = cl_adapter::cast<cl_device_id>(URDevice);
cl_device_id CLDevice = URDevice->CLDevice;

ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0;
CL_RETURN_ON_FAILURE(
Expand Down Expand Up @@ -564,7 +583,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
UR_RETURN_ON_FAILURE(validateCommandDesc(hCommand, pUpdateKernelLaunch));

ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer;
cl_context CLContext = cl_adapter::cast<cl_context>(hCommandBuffer->hContext);
cl_context CLContext = hCommandBuffer->hContext->CLContext;

cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr;
UR_RETURN_ON_FAILURE(
Expand Down Expand Up @@ -608,8 +627,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
updateNDRange(CLLocalWorkSize, LocalWorkSizePtr);
}

cl_mutable_command_khr command =
cl_adapter::cast<cl_mutable_command_khr>(hCommand->CLMutableCommand);
cl_mutable_command_khr command = hCommand->CLMutableCommand;
cl_mutable_dispatch_config_khr dispatch_config = {
command,
static_cast<cl_uint>(CLArgs.size()), // num_args
Expand Down
14 changes: 0 additions & 14 deletions source/adapters/opencl/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,20 +158,6 @@ extern thread_local char ErrorMessage[MaxMessageSize];
ur_result_t ErrorCode);

[[noreturn]] void die(const char *Message);

template <class To, class From> To cast(From Value) {

if constexpr (std::is_pointer_v<From>) {
static_assert(std::is_pointer_v<From> == std::is_pointer_v<To>,
"Cast failed pointer check");
return reinterpret_cast<To>(Value);
} else {
static_assert(sizeof(From) == sizeof(To), "Cast failed size check");
static_assert(std::is_signed_v<From> == std::is_signed_v<To>,
"Cast failed sign check");
return static_cast<To>(Value);
}
}
} // namespace cl_adapter

namespace cl_ext {
Expand Down
Loading

0 comments on commit d160a92

Please sign in to comment.