diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index f36f329e39..d500c2e4be 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -46,7 +46,7 @@ commandHandleReleaseInternal(ur_exp_command_buffer_command_handle_t Command) { ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { urQueueRelease(hInternalQueue); - cl_context CLContext = cl_adapter::cast(hContext); + cl_context CLContext = hContext->get(); cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( @@ -77,7 +77,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; - cl_device_id CLDevice = cl_adapter::cast(hDevice); + cl_device_id CLDevice = hDevice->get(); CL_RETURN_ON_FAILURE( getDeviceCommandBufferUpdateCapabilities(CLDevice, UpdateCapabilities)); bool DeviceSupportsUpdate = UpdateCapabilities > 0; @@ -91,9 +91,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( IsUpdatable ? CL_COMMAND_BUFFER_MUTABLE_KHR : 0u, 0}; cl_int Res = CL_SUCCESS; - cl_command_queue CLQueue = Queue->get(); - auto CLCommandBuffer = clCreateCommandBufferKHR( - 1, CLQueue, Properties, &Res); + const cl_command_queue CLQueue = Queue->get(); + auto CLCommandBuffer = + clCreateCommandBufferKHR(1, &CLQueue, Properties, &Res); CL_RETURN_ON_FAILURE_AND_SET_NULL(Res, phCommandBuffer); try { @@ -178,10 +178,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( cl_command_properties_khr *Properties = hCommandBuffer->IsUpdatable ? UpdateProperties : nullptr; CL_RETURN_ON_FAILURE(clCommandNDRangeKernelKHR( - hCommandBuffer->CLCommandBuffer, nullptr, Properties, - hKernel->get(), workDim, pGlobalWorkOffset, - pGlobalWorkSize, pLocalWorkSize, numSyncPointsInWaitList, - pSyncPointWaitList, pSyncPoint, OutCommandHandle)); + hCommandBuffer->CLCommandBuffer, nullptr, Properties, hKernel->get(), + workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, + OutCommandHandle)); try { auto URCommandHandle = @@ -237,10 +237,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - hSrcMem->get(), hDstMem->get(), - srcOffset, dstOffset, size, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->get(), + hDstMem->get(), srcOffset, dstOffset, size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -271,11 +270,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR)); CL_RETURN_ON_FAILURE(clCommandCopyBufferRectKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - hSrcMem->get(), hDstMem->get(), - OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, srcRowPitch, srcSlicePitch, - dstRowPitch, dstSlicePitch, numSyncPointsInWaitList, pSyncPointWaitList, - pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hSrcMem->get(), + hDstMem->get(), OpenCLOriginRect, OpenCLDstRect, OpenCLRegion, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -355,9 +353,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( cl_ext::CommandFillBufferName, &clCommandFillBufferKHR)); CL_RETURN_ON_FAILURE(clCommandFillBufferKHR( - hCommandBuffer->CLCommandBuffer, nullptr, nullptr, - hBuffer->get(), pPattern, patternSize, offset, size, - numSyncPointsInWaitList, pSyncPointWaitList, pSyncPoint, nullptr)); + hCommandBuffer->CLCommandBuffer, nullptr, nullptr, hBuffer->get(), + pPattern, patternSize, offset, size, numSyncPointsInWaitList, + pSyncPointWaitList, pSyncPoint, nullptr)); return UR_RESULT_SUCCESS; } @@ -481,11 +479,11 @@ void updateKernelArgs(std::vector &CLArgs, for (uint32_t i = 0; i < NumMemobjArgs; i++) { const ur_exp_command_buffer_update_memobj_arg_desc_t &URMemObjArg = ArgMemobjList[i]; + cl_mem arg_value = URMemObjArg.hNewMemObjArg->get(); cl_mutable_dispatch_arg_khr CLArg{ URMemObjArg.argIndex, // arg_index sizeof(cl_mem), // arg_size - cl_adapter::cast( - &URMemObjArg.hNewMemObjArg) // arg_value + &arg_value // arg_value }; CLArgs.push_back(CLArg); @@ -516,7 +514,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } ur_exp_command_buffer_handle_t hCommandBuffer = hCommand->hCommandBuffer; - cl_context CLContext = cl_adapter::cast(hCommandBuffer->hContext); + cl_context CLContext = hCommandBuffer->hContext->get(); cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; UR_RETURN_ON_FAILURE( @@ -566,8 +564,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( updateNDRange(CLLocalWorkSize, LocalWorkSizePtr); } - cl_mutable_command_khr command = - cl_adapter::cast(hCommand->CLMutableCommand); + cl_mutable_command_khr command = hCommand->CLMutableCommand; cl_mutable_dispatch_config_khr dispatch_config = { command, static_cast(CLArgs.size()), // num_args diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index de8b766272..ba4c4ead69 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -158,20 +158,6 @@ extern thread_local char ErrorMessage[MaxMessageSize]; ur_result_t ErrorCode); [[noreturn]] void die(const char *Message); - -template To cast(From Value) { - - if constexpr (std::is_pointer_v) { - static_assert(std::is_pointer_v == std::is_pointer_v, - "Cast failed pointer check"); - return reinterpret_cast(Value); - } else { - static_assert(sizeof(From) == sizeof(To), "Cast failed size check"); - static_assert(std::is_signed_v == std::is_signed_v, - "Cast failed sign check"); - return static_cast(Value); - } -} } // namespace cl_adapter namespace cl_ext { diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 8ec5fa3b2b..ae77169817 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -25,9 +25,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreate( } try { - cl_context Ctx = clCreateContext( - nullptr, cl_adapter::cast(DeviceCount), CLDevices.data(), - nullptr, nullptr, cl_adapter::cast(&Ret)); + cl_context Ctx = clCreateContext(nullptr, static_cast(DeviceCount), + CLDevices.data(), nullptr, nullptr, + static_cast(&Ret)); CL_RETURN_ON_FAILURE(Ret); auto URContext = std::make_unique(Ctx, DeviceCount, phDevices); @@ -84,22 +84,19 @@ urContextRelease(ur_context_handle_t hContext) { static std::mutex contextReleaseMutex; auto clContext = hContext->get(); - { - std::lock_guard lock(contextReleaseMutex); - size_t refCount = 0; - CL_RETURN_ON_FAILURE(clGetContextInfo(clContext, CL_CONTEXT_REFERENCE_COUNT, - sizeof(size_t), &refCount, nullptr)); - - // ExtFuncPtrCache is destroyed in an atexit() callback, so it doesn't - // necessarily outlive the adapter (or all the contexts). - if (refCount == 1 && cl_ext::ExtFuncPtrCache) { - cl_ext::ExtFuncPtrCache->clearCache(clContext); - } + std::lock_guard lock(contextReleaseMutex); + size_t refCount = hContext->getReferenceCount(); + // ExtFuncPtrCache is destroyed in an atexit() callback, so it doesn't + // necessarily outlive the adapter (or all the contexts). + if (refCount == 1 && cl_ext::ExtFuncPtrCache) { + cl_ext::ExtFuncPtrCache->clearCache(clContext); } - CL_RETURN_ON_FAILURE( - clReleaseContext(hContext->get())); - + if (hContext->decrementReferenceCount() == 0) { + delete hContext; + } else { + CL_RETURN_ON_FAILURE(clReleaseContext(hContext->get())); + } return UR_RESULT_SUCCESS; } @@ -118,8 +115,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextGetNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( - ur_native_handle_t hNativeContext, ur_adapter_handle_t, uint32_t, - const ur_device_handle_t *, + ur_native_handle_t hNativeContext, ur_adapter_handle_t, uint32_t numDevices, + const ur_device_handle_t *phDevices, const ur_context_native_properties_t *pProperties, ur_context_handle_t *phContext) { diff --git a/source/adapters/opencl/context.hpp b/source/adapters/opencl/context.hpp index 555636b1b8..66ab0e2518 100644 --- a/source/adapters/opencl/context.hpp +++ b/source/adapters/opencl/context.hpp @@ -40,9 +40,6 @@ struct ur_context_handle_t_ { static ur_result_t makeWithNative(native_type Ctx, uint32_t DevCount, const ur_device_handle_t *phDevices, ur_context_handle_t &Context) { - if (!phDevices) { - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - } try { uint32_t CLDeviceCount; CL_RETURN_ON_FAILURE(clGetContextInfo(Ctx, CL_CONTEXT_NUM_DEVICES, @@ -63,6 +60,7 @@ struct ur_context_handle_t_ { auto URContext = std::make_unique(Ctx, DevCount, phDevices); Context = URContext.release(); + CL_RETURN_ON_FAILURE(clRetainContext(Ctx)); } catch (std::bad_alloc &) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; } catch (...) { diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index d4c716de5e..f32d8b50c0 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -13,65 +13,6 @@ #include #include -ur_result_t cl_adapter::getDeviceVersion(cl_device_id Dev, - oclv::OpenCLVersion &Version) { - - size_t DevVerSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_VERSION, 0, nullptr, &DevVerSize)); - - std::string DevVer(DevVerSize, '\0'); - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_VERSION, DevVerSize, - DevVer.data(), nullptr)); - - Version = oclv::OpenCLVersion(DevVer); - if (!Version.isValid()) { - return UR_RESULT_ERROR_INVALID_DEVICE; - } - - return UR_RESULT_SUCCESS; -} - -static bool isIntelFPGAEmuDevice(cl_device_id Dev) { - size_t NameSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_NAME, 0, nullptr, &NameSize)); - std::string NameStr(NameSize, '\0'); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_NAME, NameSize, NameStr.data(), nullptr)); - - return NameStr.find("Intel(R) FPGA Emulation Device") != std::string::npos; -} - -ur_result_t cl_adapter::checkDeviceExtensions( - cl_device_id Dev, const std::vector &Exts, bool &Supported) { - size_t ExtSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); - - std::string ExtStr(ExtSize, '\0'); - - CL_RETURN_ON_FAILURE(clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, ExtSize, - ExtStr.data(), nullptr)); - - Supported = true; - for (const std::string &Ext : Exts) { - if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { - // The Intel FPGA emulation device does actually support these, even if it - // doesn't report them. - if (isIntelFPGAEmuDevice(Dev) && - (Ext == "cl_intel_device_attribute_query" || - Ext == "cl_intel_required_subgroup_size")) { - Supported = true; - continue; - } - break; - } - } - - return UR_RESULT_SUCCESS; -} - UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform, ur_device_type_t DeviceType, uint32_t NumEntries, @@ -372,9 +313,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_DEVICE_ID: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), {"cl_khr_pci_bus_info"}, - Supported)); + UR_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_pci_bus_info"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; @@ -389,8 +329,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { oclv::OpenCLVersion Version; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - hDevice->get(), Version)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(Version)); const std::string Results = std::to_string(Version.getMajor()) + "." + std::to_string(Version.getMinor()); @@ -494,8 +433,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* Corresponding OpenCL query is only available starting with OpenCL 2.1 * and we have to emulate it on older OpenCL runtimes. */ oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - hDevice->get(), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); if (DevVer >= oclv::V2_1) { cl_uint CLValue; @@ -524,8 +462,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * UR type: ur_device_fp_capability_flags_t */ if (propName == UR_DEVICE_INFO_HALF_FP_CONFIG) { bool Supported; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), {"cl_khr_fp16"}, Supported)); + UR_RETURN_ON_FAILURE( + hDevice->checkDeviceExtensions({"cl_khr_fp16"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; @@ -544,8 +482,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* This query is missing before OpenCL 3.0. Check version and handle * appropriately */ oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - hDevice->get(), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); /* Minimum required capability to be returned. For OpenCL 1.2, this is all * that is required */ @@ -603,8 +540,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - hDevice->get(), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { @@ -656,8 +592,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL; oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - hDevice->get(), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); cl_device_atomic_capabilities CLCapabilities; if (DevVer >= oclv::V3_0) { @@ -705,8 +640,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; oclv::OpenCLVersion DevVer; - UR_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( - hDevice->get(), DevVer)); + UR_RETURN_ON_FAILURE(hDevice->getDeviceVersion(DevVer)); auto convertCapabilities = [](cl_device_atomic_capabilities CLCapabilities) { @@ -749,7 +683,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // not return an error if the query is unsuccessful as this is expected // of an OpenCL 1.2 driver. cl_device_atomic_capabilities CLCapabilities; - if (CL_SUCCESS == clGetDeviceInfo(cl_adapter::cast(hDevice), + if (CL_SUCCESS == clGetDeviceInfo(hDevice->get(), CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr)) { @@ -772,8 +706,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_ATOMIC_64: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics"}, Supported)); @@ -790,8 +723,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_mem_channel_property"}, Supported)); return ReturnValue(Supported); @@ -823,15 +755,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_program_scope_host_pipe"}, Supported)); return ReturnValue(Supported); } case UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT: { bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_global_variable_access"}, Supported)); return ReturnValue(Supported); } @@ -862,14 +792,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ bool Supported = false; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_unified_shared_memory"}, Supported)); if (Supported) { cl_bitfield CLValue = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, - sizeof(cl_bitfield), &CLValue, nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + hDevice->get(), CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); return ReturnValue(static_cast(CLValue)); } else { return ReturnValue(0); @@ -964,25 +892,22 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, } case UR_DEVICE_INFO_IP_VERSION: { bool Supported; - UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( - hDevice->get(), + UR_RETURN_ON_FAILURE(hDevice->checkDeviceExtensions( {"cl_intel_device_attribute_query"}, Supported)); if (!Supported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(hDevice->get(), CLPropName, - propSize, pPropValue, pPropSizeRet)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, propSize, + pPropValue, pPropSizeRet)); return UR_RESULT_SUCCESS; } case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { bool isExtensionSupported; - if (cl_adapter::checkDeviceExtensions( - hDevice->get(), - {"cl_intel_required_subgroup_size"}, - isExtensionSupported) != UR_RESULT_SUCCESS || + if (hDevice->checkDeviceExtensions({"cl_intel_required_subgroup_size"}, + isExtensionSupported) != + UR_RESULT_SUCCESS || !isExtensionSupported) { std::vector aThreadIsItsOwnSubGroup({1}); return ReturnValue(aThreadIsItsOwnSubGroup.data(), @@ -991,13 +916,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, // Have to convert size_t to uint32_t size_t SubGroupSizesSize = 0; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(hDevice->get(), CLPropName, 0, - nullptr, &SubGroupSizesSize)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, 0, nullptr, + &SubGroupSizesSize)); std::vector SubGroupSizes(SubGroupSizesSize / sizeof(size_t)); - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(hDevice->get(), CLPropName, - SubGroupSizesSize, SubGroupSizes.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CLPropName, + SubGroupSizesSize, + SubGroupSizes.data(), nullptr)); return ReturnValue.template operator()(SubGroupSizes.data(), SubGroupSizes.size()); } @@ -1021,23 +945,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_UUID: { // Use the cl_khr_device_uuid extension, if available. bool isKhrDeviceUuidSupported = false; - if (cl_adapter::checkDeviceExtensions( - cl_adapter::cast(hDevice), {"cl_khr_device_uuid"}, - isKhrDeviceUuidSupported) != UR_RESULT_SUCCESS || + if (hDevice->checkDeviceExtensions({"cl_khr_device_uuid"}, + isKhrDeviceUuidSupported) != + UR_RESULT_SUCCESS || !isKhrDeviceUuidSupported) { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } static_assert(CL_UUID_SIZE_KHR == 16); std::array UUID{}; - CL_RETURN_ON_FAILURE( - clGetDeviceInfo(cl_adapter::cast(hDevice), - CL_DEVICE_UUID_KHR, UUID.size(), UUID.data(), nullptr)); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(hDevice->get(), CL_DEVICE_UUID_KHR, + UUID.size(), UUID.data(), nullptr)); return ReturnValue(UUID); } case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: { return ReturnValue(false); } + case UR_DEVICE_INFO_REFERENCE_COUNT: { + return ReturnValue(hDevice->getReferenceCount()); + } + case UR_DEVICE_INFO_PLATFORM: { + return ReturnValue(hDevice->Platform); + } + case UR_DEVICE_INFO_PARENT_DEVICE: { + return ReturnValue(hDevice->ParentDevice); + } /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU * EU device-specific information extensions. Some of the queries are * enabled by cl_intel_device_attribute_query extension, but it's not yet in @@ -1076,7 +1008,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, std::string::npos); } case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { - cl_device_id Dev = cl_adapter::cast(hDevice); + cl_device_id Dev = hDevice->get(); ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0; CL_RETURN_ON_FAILURE( getDeviceCommandBufferUpdateCapabilities(Dev, UpdateCapabilities)); diff --git a/source/adapters/opencl/device.hpp b/source/adapters/opencl/device.hpp index 22f554ce73..7420f58665 100644 --- a/source/adapters/opencl/device.hpp +++ b/source/adapters/opencl/device.hpp @@ -59,6 +59,17 @@ struct ur_device_handle_t_ { return UR_RESULT_SUCCESS; } + bool isIntelFPGAEmuDevice() { + size_t NameSize = 0; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(Device, CL_DEVICE_NAME, 0, nullptr, &NameSize)); + std::string NameStr(NameSize, '\0'); + CL_RETURN_ON_FAILURE(clGetDeviceInfo(Device, CL_DEVICE_NAME, NameSize, + NameStr.data(), nullptr)); + + return NameStr.find("Intel(R) FPGA Emulation Device") != std::string::npos; + } + ur_result_t checkDeviceExtensions(const std::vector &Exts, bool &Supported) { size_t ExtSize = 0; @@ -73,6 +84,14 @@ struct ur_device_handle_t_ { Supported = true; for (const std::string &Ext : Exts) { if (!(Supported = (ExtStr.find(Ext) != std::string::npos))) { + // The Intel FPGA emulation device does actually support these, even if + // it doesn't report them. + if (isIntelFPGAEmuDevice() && + (Ext == "cl_intel_device_attribute_query" || + Ext == "cl_intel_required_subgroup_size")) { + Supported = true; + continue; + } break; } } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 379c125e8f..02c451e249 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -529,18 +529,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( ur_event_handle_t *phEvent) { cl_context Ctx = hQueue->Context->get(); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, cl_ext::EnqueueWriteGlobalVariableName, &F)); - Res = F(hQueue->get(), - hProgram->get(), name, blockingWrite, count, - offset, pSrc, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); - + cl_int Res = + F(hQueue->get(), hProgram->get(), name, blockingWrite, count, offset, + pSrc, numEventsInWaitList, CLWaitEvents.data(), &Event); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return mapCLErrorToUR(Res); } @@ -551,18 +563,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( ur_event_handle_t *phEvent) { cl_context Ctx = hQueue->Context->get(); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, cl_ext::EnqueueReadGlobalVariableName, &F)); - Res = F(hQueue->get(), - hProgram, name, blockingRead, count, - offset, pDst, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent)); + cl_int Res = + F(hQueue->get(), hProgram->get(), name, blockingRead, count, offset, pDst, + numEventsInWaitList, CLWaitEvents.data(), &Event); + if (phEvent) { + try { + auto UREvent = + std::make_unique(Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } return mapCLErrorToUR(Res); } @@ -573,7 +598,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( ur_event_handle_t *phEvent) { cl_context CLContext = hQueue->Context->get(); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -581,12 +610,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( cl_ext::EnqueueReadHostPipeName, &FuncPtr)); if (FuncPtr) { - CL_RETURN_ON_FAILURE( - FuncPtr(hQueue->get(), - hProgram->get(), pipe_symbol, blocking, - pDst, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + CL_RETURN_ON_FAILURE(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, + blocking, pDst, size, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } } return UR_RESULT_SUCCESS; @@ -599,7 +637,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( ur_event_handle_t *phEvent) { cl_context CLContext = hQueue->Context->get(); - + cl_event Event; + std::vector CLWaitEvents(numEventsInWaitList); + for (uint32_t i = 0; i < numEventsInWaitList; i++) { + CLWaitEvents[i] = phEventWaitList[i]->get(); + } cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( @@ -607,12 +649,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( cl_ext::EnqueueWriteHostPipeName, &FuncPtr)); if (FuncPtr) { - CL_RETURN_ON_FAILURE( - FuncPtr(hQueue->get(), - hProgram->get(), pipe_symbol, blocking, - pSrc, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + CL_RETURN_ON_FAILURE(FuncPtr(hQueue->get(), hProgram->get(), pipe_symbol, + blocking, pSrc, size, numEventsInWaitList, + CLWaitEvents.data(), &Event)); + if (phEvent) { + try { + auto UREvent = std::make_unique( + Event, hQueue->Context, hQueue); + *phEvent = UREvent.release(); + } catch (std::bad_alloc &) { + return UR_RESULT_ERROR_OUT_OF_RESOURCES; + } catch (...) { + return UR_RESULT_ERROR_UNKNOWN; + } + } } return UR_RESULT_SUCCESS; diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index ae60a93a1d..a03ecbb7eb 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -193,16 +193,19 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, *pPropSizeRet = CheckPropSize; } - if (pPropValue) { - if (propName == UR_EVENT_INFO_COMMAND_TYPE) { - *reinterpret_cast(pPropValue) = convertCLCommandTypeToUR( - *reinterpret_cast(pPropValue)); - } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - const auto param_value_int = static_cast(pPropValue); - if (*param_value_int < 0) { - // This can contain a negative return code to signify that the command - // terminated in an unexpected way. - *param_value_int = UR_EVENT_STATUS_ERROR; + if (pPropValue) { + if (propName == UR_EVENT_INFO_COMMAND_TYPE) { + *reinterpret_cast(pPropValue) = + convertCLCommandTypeToUR( + *reinterpret_cast(pPropValue)); + } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + const auto param_value_int = + static_cast(pPropValue); + if (*param_value_int < 0) { + // This can contain a negative return code to signify that the command + // terminated in an unexpected way. + *param_value_int = UR_EVENT_STATUS_ERROR; + } } } } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 43229584f7..52e24770a5 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -12,6 +12,7 @@ #include "device.hpp" #include "memory.hpp" #include "program.hpp" +#include "queue.hpp" #include "sampler.hpp" #include @@ -42,7 +43,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgValue( const ur_kernel_arg_value_properties_t *, const void *pArgValue) { CL_RETURN_ON_FAILURE(clSetKernelArg( - hKernel->get(), cl_adapter::cast(argIndex), argSize, pArgValue)); + hKernel->get(), static_cast(argIndex), argSize, pArgValue)); return UR_RESULT_SUCCESS; } @@ -52,7 +53,7 @@ urKernelSetArgLocal(ur_kernel_handle_t hKernel, uint32_t argIndex, size_t argSize, const ur_kernel_arg_local_properties_t *) { CL_RETURN_ON_FAILURE(clSetKernelArg( - hKernel->get(), cl_adapter::cast(argIndex), argSize, nullptr)); + hKernel->get(), static_cast(argIndex), argSize, nullptr)); return UR_RESULT_SUCCESS; } @@ -84,20 +85,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { + + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { // OpenCL doesn't have a way to support this. - if (propName == UR_KERNEL_INFO_NUM_REGS) { + case UR_KERNEL_INFO_NUM_REGS: { return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - size_t CheckPropSize = 0; - cl_int ClResult = clGetKernelInfo(hKernel->get(), - mapURKernelInfoToCL(propName), propSize, - pPropValue, &CheckPropSize); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; + case UR_KERNEL_INFO_PROGRAM: { + return ReturnValue(hKernel->Program); + } + case UR_KERNEL_INFO_CONTEXT: { + return ReturnValue(hKernel->Context); } - CL_RETURN_ON_FAILURE(ClResult); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; + case UR_KERNEL_INFO_REFERENCE_COUNT: { + return ReturnValue(hKernel->getReferenceCount()); + } + default: { + size_t CheckPropSize = 0; + cl_int ClResult = + clGetKernelInfo(hKernel->get(), mapURKernelInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } } } @@ -356,9 +372,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( cl_ext::SetKernelArgMemPointerName, &FuncPtr)); if (FuncPtr) { - CL_RETURN_ON_FAILURE(FuncPtr(hKernel->get(), - cl_adapter::cast(argIndex), - pArgValue)); + CL_RETURN_ON_FAILURE( + FuncPtr(hKernel->get(), static_cast(argIndex), pArgValue)); } return UR_RESULT_SUCCESS; @@ -402,7 +417,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgMemObj( cl_mem CLArgValue = hArgValue ? hArgValue->get() : nullptr; CL_RETURN_ON_FAILURE(clSetKernelArg(hKernel->get(), - cl_adapter::cast(argIndex), + static_cast(argIndex), sizeof(CLArgValue), &CLArgValue)); return UR_RESULT_SUCCESS; } @@ -412,9 +427,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgSampler( const ur_kernel_arg_sampler_properties_t *, ur_sampler_handle_t hArgValue) { cl_sampler CLArgSampler = hArgValue->get(); - cl_int RetErr = - clSetKernelArg(hKernel->get(), cl_adapter::cast(argIndex), - sizeof(CLArgSampler), &CLArgSampler); + cl_int RetErr = clSetKernelArg(hKernel->get(), static_cast(argIndex), + sizeof(CLArgSampler), &CLArgSampler); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; } @@ -427,8 +441,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( cl_platform_id Platform; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CL_QUEUE_DEVICE, - sizeof(cl_device_id), &Device, nullptr)); + hQueue->get(), CL_QUEUE_DEVICE, sizeof(cl_device_id), &Device, nullptr)); CL_RETURN_ON_FAILURE(clGetDeviceInfo( Device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &Platform, nullptr)); @@ -441,8 +454,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; CL_RETURN_ON_FAILURE(GetKernelSuggestedLocalWorkSizeFuncPtr( - cl_adapter::cast(hQueue), - cl_adapter::cast(hKernel), workDim, pGlobalWorkOffset, + hQueue->get(), hKernel->get(), workDim, pGlobalWorkOffset, pGlobalWorkSize, pSuggestedLocalWorkSize)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 1222e845e2..064feba00d 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -121,8 +121,7 @@ cl_image_format mapURImageFormatToCL(const ur_image_format_t *PImageFormat) { cl_image_desc mapURImageDescToCL(const ur_image_desc_t *PImageDesc) { cl_image_desc CLImageDesc; - CLImageDesc.image_type = - cl_adapter::cast(PImageDesc->type); + CLImageDesc.image_type = static_cast(PImageDesc->type); switch (PImageDesc->type) { case UR_MEM_TYPE_IMAGE2D: @@ -259,7 +258,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( try { cl_mem Buffer = FuncPtr( CLContext, PropertiesIntel.data(), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&RetErr)); + size, pProperties->pHost, static_cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); auto URMem = std::make_unique(Buffer, hContext); *phBuffer = URMem.release(); @@ -276,7 +275,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( try { cl_mem Buffer = clCreateBuffer(hContext->get(), static_cast(flags), size, - HostPtr, cl_adapter::cast(&RetErr)); + HostPtr, static_cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); auto URMem = std::make_unique(Buffer, hContext); *phBuffer = URMem.release(); @@ -303,7 +302,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreate( try { cl_mem Mem = clCreateImage(hContext->get(), MapFlags, &ImageFormat, &ImageDesc, - pHost, cl_adapter::cast(&RetErr)); + pHost, static_cast(&RetErr)); CL_RETURN_ON_FAILURE(RetErr); auto URMem = std::make_unique(Mem, hContext); *phMem = URMem.release(); @@ -338,7 +337,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( try { cl_mem Buffer = clCreateSubBuffer( hBuffer->get(), static_cast(flags), BufferCreateType, - &BufferRegion, cl_adapter::cast(&RetErr)); + &BufferRegion, static_cast(&RetErr)); if (RetErr == CL_INVALID_VALUE) { size_t BufferSize = 0; CL_RETURN_ON_FAILURE(clGetMemObjectInfo(hBuffer->get(), CL_MEM_SIZE, diff --git a/source/adapters/opencl/platform.cpp b/source/adapters/opencl/platform.cpp index 242c56f199..526246f49a 100644 --- a/source/adapters/opencl/platform.cpp +++ b/source/adapters/opencl/platform.cpp @@ -43,10 +43,8 @@ urPlatformGetInfo(ur_platform_handle_t hPlatform, ur_platform_info_t propName, case UR_PLATFORM_INFO_VERSION: case UR_PLATFORM_INFO_EXTENSIONS: case UR_PLATFORM_INFO_PROFILE: { - cl_platform_id Plat = nullptr; - if (hPlatform) { - Plat = hPlatform->get(); - } + cl_platform_id Plat = hPlatform->get(); + CL_RETURN_ON_FAILURE( clGetPlatformInfo(Plat, CLPropName, propSize, pPropValue, pSizeRet)); @@ -75,16 +73,16 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, std::call_once( InitFlag, - [](cl_int &Result) { - Result = clGetPlatformIDs(0, nullptr, &NumPlatforms); - if (Result != CL_SUCCESS) { - return Result; + [](cl_int &Res) { + Res = clGetPlatformIDs(0, nullptr, &NumPlatforms); + if (Res != CL_SUCCESS) { + return Res; } std::vector CLPlatforms(NumPlatforms); - Result = clGetPlatformIDs(cl_adapter::cast(NumPlatforms), - CLPlatforms.data(), nullptr); - if (Result != CL_SUCCESS) { - return Result; + Res = clGetPlatformIDs(static_cast(NumPlatforms), + CLPlatforms.data(), nullptr); + if (Res != CL_SUCCESS) { + return Res; } try { for (uint32_t i = 0; i < NumPlatforms; i++) { @@ -97,7 +95,7 @@ urPlatformGet(ur_adapter_handle_t *, uint32_t, uint32_t NumEntries, } catch (...) { return CL_INVALID_PLATFORM; } - return Result; + return Res; }, Result); diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 9fb4254674..08ec3f78d4 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -116,8 +116,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary( cl_int CLResult; try { cl_program Program = clCreateProgramWithBinary( - hContext->get(), cl_adapter::cast(1u), Devices, Lengths, - &pBinary, BinaryStatus, &CLResult); + hContext->get(), static_cast(1u), Devices, Lengths, &pBinary, + BinaryStatus, &CLResult); CL_RETURN_ON_FAILURE(CLResult); auto URProgram = std::make_unique(Program, hContext); *phProgram = URProgram.release(); @@ -136,8 +136,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + uint32_t DeviceCount = hProgram->Context->DeviceCount; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + } CL_RETURN_ON_FAILURE(clCompileProgram(hProgram->get(), DeviceCount, CLDevicesInProgram.data(), pOptions, 0, @@ -218,12 +221,15 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild([[maybe_unused]] ur_context_handle_t hContext, ur_program_handle_t hProgram, const char *pOptions) { - std::unique_ptr> DevicesInProgram; - UR_RETURN_ON_FAILURE(getDevicesFromProgram(hProgram, DevicesInProgram)); + uint32_t DeviceCount = hProgram->Context->DeviceCount; + std::vector CLDevicesInProgram(DeviceCount); + for (uint32_t i = 0; i < DeviceCount; i++) { + CLDevicesInProgram[i] = hProgram->Context->Devices[i]->get(); + } - CL_RETURN_ON_FAILURE(clBuildProgram( - hProgram->get(), DevicesInProgram->size(), - DevicesInProgram->data(), pOptions, nullptr, nullptr)); + CL_RETURN_ON_FAILURE( + clBuildProgram(hProgram->get(), CLDevicesInProgram.size(), + CLDevicesInProgram.data(), pOptions, nullptr, nullptr)); return UR_RESULT_SUCCESS; } @@ -238,9 +244,9 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count, CLPrograms[i] = phPrograms[i]->get(); } cl_program Program = clLinkProgram( - hContext->get(), 0, nullptr, pOptions, cl_adapter::cast(count), + hContext->get(), 0, nullptr, pOptions, static_cast(count), CLPrograms.data(), nullptr, nullptr, &CLResult); - + if (CL_INVALID_BINARY == CLResult) { // Some OpenCL drivers incorrectly return CL_INVALID_BINARY here, convert it // to CL_LINK_PROGRAM_FAILURE diff --git a/source/adapters/opencl/queue.hpp b/source/adapters/opencl/queue.hpp index e44af5f4d9..6ea50402a9 100644 --- a/source/adapters/opencl/queue.hpp +++ b/source/adapters/opencl/queue.hpp @@ -34,9 +34,6 @@ struct ur_queue_handle_t_ { ur_context_handle_t Context, ur_device_handle_t Device, ur_queue_handle_t &Queue) { - if (!Context || !Device) { - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - } try { cl_context CLContext; CL_RETURN_ON_FAILURE(clGetCommandQueueInfo(NativeQueue, CL_QUEUE_CONTEXT, diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index f663429bfe..9dee3065f3 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -166,16 +166,25 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); static_assert(sizeof(cl_addressing_mode) == sizeof(ur_sampler_addressing_mode_t)); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); ur_result_t Err = UR_RESULT_SUCCESS; + + switch (propName) { + case UR_SAMPLER_INFO_CONTEXT: { + return ReturnValue(hSampler->Context); + } + case UR_SAMPLER_INFO_REFERENCE_COUNT: { + return ReturnValue(hSampler->getReferenceCount()); + } // ur_bool_t have a size of uint8_t, but cl_bool size have the size of // uint32_t so this adjust UR_SAMPLER_INFO_NORMALIZED_COORDS info to map // between them. - if (propName == UR_SAMPLER_INFO_NORMALIZED_COORDS) { + case UR_SAMPLER_INFO_NORMALIZED_COORDS: { cl_bool normalized_coords = false; - Err = mapCLErrorToUR( - clGetSamplerInfo(hSampler->get(), SamplerInfo, - sizeof(cl_bool), &normalized_coords, nullptr)); + Err = mapCLErrorToUR(clGetSamplerInfo(hSampler->get(), SamplerInfo, + sizeof(cl_bool), &normalized_coords, + nullptr)); if (pPropValue && propSize != sizeof(ur_bool_t)) { return UR_RESULT_ERROR_INVALID_SIZE; } @@ -187,18 +196,7 @@ urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, if (pPropSizeRet) { *pPropSizeRet = sizeof(ur_bool_t); } - } else { - size_t CheckPropSize = 0; - Err = mapCLErrorToUR( - clGetSamplerInfo(hSampler->get(), SamplerInfo, - propSize, pPropValue, &CheckPropSize)); - if (pPropValue && CheckPropSize != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - UR_RETURN_ON_FAILURE(Err); - if (pPropSizeRet) { - *pPropSizeRet = CheckPropSize; - } + break; } default: { size_t CheckPropSize = 0; diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index beea4aee88..cd0d4e3b37 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -211,7 +211,7 @@ urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, hDevice->get(), AllocProperties.empty() ? nullptr : AllocProperties.data(), - size, Alignment, cl_adapter::cast(&ClResult)); + size, Alignment, static_cast(&ClResult)); if (ClResult == CL_INVALID_BUFFER_SIZE) { return UR_RESULT_ERROR_INVALID_USM_SIZE; } diff --git a/test/conformance/context/context_adapter_opencl.match b/test/conformance/context/context_adapter_opencl.match new file mode 100644 index 0000000000..869f11cd42 --- /dev/null +++ b/test/conformance/context/context_adapter_opencl.match @@ -0,0 +1 @@ +urContextCreateWithNativeHandleTest.SuccessWithUnOwnedNativeHandle/Intel_R__OpenCL___{{.*}}_