diff --git a/include/ur_api.h b/include/ur_api.h index 205c10863c..81d7a92c01 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -433,10 +433,6 @@ typedef enum ur_function_t { UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245, /// Enumerator for ::urEnqueueEventsWaitWithBarrierExt UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246, - /// Enumerator for ::urTensorMapEncodeIm2ColExp - UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247, - /// Enumerator for ::urTensorMapEncodeTiledExp - UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248, /// Enumerator for ::urPhysicalMemGetInfo UR_FUNCTION_PHYSICAL_MEM_GET_INFO = 249, /// @cond @@ -12133,258 +12129,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp( /// array. ur_event_handle_t *phEvent); -#if !defined(__GNUC__) -#pragma endregion -#endif -// Intel 'oneAPI' Unified Runtime Experimental API for mapping tensor objects -#if !defined(__GNUC__) -#pragma region tensor_map_(experimental) -#endif -/////////////////////////////////////////////////////////////////////////////// -/// @brief Handle of tensor map object -typedef struct ur_exp_tensor_map_handle_t_ *ur_exp_tensor_map_handle_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Tensor map data type -typedef uint32_t ur_exp_tensor_map_data_type_flags_t; -typedef enum ur_exp_tensor_map_data_type_flag_t { - /// 1 byte - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8 = UR_BIT(0), - /// 2 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16 = UR_BIT(1), - /// 4 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32 = UR_BIT(2), - /// 4 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32 = UR_BIT(3), - /// 8 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64 = UR_BIT(4), - /// 8 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64 = UR_BIT(5), - /// 2 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16 = UR_BIT(6), - /// 4 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32 = UR_BIT(7), - /// 8 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64 = UR_BIT(8), - /// 2 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16 = UR_BIT(9), - /// 4 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ = UR_BIT(10), - /// 4 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32 = UR_BIT(11), - /// 4 bytes - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ = UR_BIT(12), - /// @cond - UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ur_exp_tensor_map_data_type_flag_t; -/// @brief Bit Mask for validating ur_exp_tensor_map_data_type_flags_t -#define UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK 0xffffe000 - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Tensor map interleave -typedef uint32_t ur_exp_tensor_map_interleave_flags_t; -typedef enum ur_exp_tensor_map_interleave_flag_t { - /// No interleave - UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE = UR_BIT(0), - /// 16B interleave - UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B = UR_BIT(1), - /// 32B interleave - UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B = UR_BIT(2), - /// @cond - UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ur_exp_tensor_map_interleave_flag_t; -/// @brief Bit Mask for validating ur_exp_tensor_map_interleave_flags_t -#define UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK 0xfffffff8 - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Tensor map l2 promotion -typedef uint32_t ur_exp_tensor_map_l2_promotion_flags_t; -typedef enum ur_exp_tensor_map_l2_promotion_flag_t { - /// No promotion type - UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE = UR_BIT(0), - /// 64B promotion type - UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B = UR_BIT(1), - /// 128B promotion type - UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B = UR_BIT(2), - /// 256B promotion type - UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B = UR_BIT(3), - /// @cond - UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ur_exp_tensor_map_l2_promotion_flag_t; -/// @brief Bit Mask for validating ur_exp_tensor_map_l2_promotion_flags_t -#define UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK 0xfffffff0 - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Tensor map swizzle -typedef uint32_t ur_exp_tensor_map_swizzle_flags_t; -typedef enum ur_exp_tensor_map_swizzle_flag_t { - /// No swizzle - UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE = UR_BIT(0), - /// 32B swizzle - UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B = UR_BIT(1), - /// 64B swizzle - UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B = UR_BIT(2), - /// 128B swizzle - UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B = UR_BIT(3), - /// @cond - UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ur_exp_tensor_map_swizzle_flag_t; -/// @brief Bit Mask for validating ur_exp_tensor_map_swizzle_flags_t -#define UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK 0xfffffff0 - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Tensor map OOB fill -typedef uint32_t ur_exp_tensor_map_oob_fill_flags_t; -typedef enum ur_exp_tensor_map_oob_fill_flag_t { - /// No OOB fill - UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE = UR_BIT(0), - /// Refer to NVIDIA docs - UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA = UR_BIT(1), - /// @cond - UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ur_exp_tensor_map_oob_fill_flag_t; -/// @brief Bit Mask for validating ur_exp_tensor_map_oob_fill_flags_t -#define UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK 0xfffffffc - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Encode tensor map with image data -/// -/// @details -/// - Map encode using im2col. -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hDevice` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` -/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` -/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` -/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` -/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == GlobalAddress` -/// + `NULL == GlobalDim` -/// + `NULL == GlobalStrides` -/// + `NULL == PixelBoxLowerCorner` -/// + `NULL == PixelBoxUpperCorner` -/// + `NULL == ElementStrides` -/// + `NULL == hTensorMap` -/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT -/// + `TensorRank < 3` -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing DHW dimensions of lower box corner. - const int *PixelBoxLowerCorner, - /// [in] Array containing DHW dimensions of upper box corner. - const int *PixelBoxUpperCorner, - /// [in] Number of channels per pixel. - uint32_t ChannelsPerPixel, - /// [in] Number of pixels per column. - uint32_t PixelsPerColumn, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Encode tensor map with tiled data -/// -/// @details -/// - Tiled map encode. -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hDevice` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` -/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` -/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` -/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` -/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == GlobalAddress` -/// + `NULL == GlobalDim` -/// + `NULL == GlobalStrides` -/// + `NULL == BoxDim` -/// + `NULL == ElementStrides` -/// + `NULL == hTensorMap` -/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT -/// + `TensorRank < 3` -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing traversal box size (number of elments) along - /// each of the TensorRank dimensions. Specifies how many elements to be - /// traversed along each tensor dimension. - const uint32_t *BoxDim, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap); - #if !defined(__GNUC__) #pragma endregion #endif @@ -14574,49 +14318,6 @@ typedef struct ur_command_buffer_command_get_info_exp_params_t { size_t **ppPropSizeRet; } ur_command_buffer_command_get_info_exp_params_t; -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urTensorMapEncodeIm2ColExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_tensor_map_encode_im_2_col_exp_params_t { - ur_device_handle_t *phDevice; - ur_exp_tensor_map_data_type_flags_t *pTensorMapType; - uint32_t *pTensorRank; - void **pGlobalAddress; - const uint64_t **pGlobalDim; - const uint64_t **pGlobalStrides; - const int **pPixelBoxLowerCorner; - const int **pPixelBoxUpperCorner; - uint32_t *pChannelsPerPixel; - uint32_t *pPixelsPerColumn; - const uint32_t **pElementStrides; - ur_exp_tensor_map_interleave_flags_t *pInterleave; - ur_exp_tensor_map_swizzle_flags_t *pSwizzle; - ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion; - ur_exp_tensor_map_oob_fill_flags_t *pOobFill; - ur_exp_tensor_map_handle_t **phTensorMap; -} ur_tensor_map_encode_im_2_col_exp_params_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function parameters for urTensorMapEncodeTiledExp -/// @details Each entry is a pointer to the parameter passed to the function; -/// allowing the callback the ability to modify the parameter's value -typedef struct ur_tensor_map_encode_tiled_exp_params_t { - ur_device_handle_t *phDevice; - ur_exp_tensor_map_data_type_flags_t *pTensorMapType; - uint32_t *pTensorRank; - void **pGlobalAddress; - const uint64_t **pGlobalDim; - const uint64_t **pGlobalStrides; - const uint32_t **pBoxDim; - const uint32_t **pElementStrides; - ur_exp_tensor_map_interleave_flags_t *pInterleave; - ur_exp_tensor_map_swizzle_flags_t *pSwizzle; - ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion; - ur_exp_tensor_map_oob_fill_flags_t *pOobFill; - ur_exp_tensor_map_handle_t **phTensorMap; -} ur_tensor_map_encode_tiled_exp_params_t; - /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urUsmP2PEnablePeerAccessExp /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_api_funcs.def b/include/ur_api_funcs.def index 280db37067..9aef728a58 100644 --- a/include/ur_api_funcs.def +++ b/include/ur_api_funcs.def @@ -189,8 +189,6 @@ _UR_API(urCommandBufferUpdateSignalEventExp) _UR_API(urCommandBufferUpdateWaitEventsExp) _UR_API(urCommandBufferGetInfoExp) _UR_API(urCommandBufferCommandGetInfoExp) -_UR_API(urTensorMapEncodeIm2ColExp) -_UR_API(urTensorMapEncodeTiledExp) _UR_API(urUsmP2PEnablePeerAccessExp) _UR_API(urUsmP2PDisablePeerAccessExp) _UR_API(urUsmP2PPeerAccessGetInfoExp) diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 9cd051db18..9e27b87f17 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1685,51 +1685,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( typedef ur_result_t(UR_APICALL *ur_pfnGetCommandBufferExpProcAddrTable_t)( ur_api_version_t, ur_command_buffer_exp_dditable_t *); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urTensorMapEncodeIm2ColExp -typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeIm2ColExp_t)( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, - uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, - ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, - ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urTensorMapEncodeTiledExp -typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeTiledExp_t)( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, - ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, - ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, - ur_exp_tensor_map_handle_t *); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Table of TensorMapExp functions pointers -typedef struct ur_tensor_map_exp_dditable_t { - ur_pfnTensorMapEncodeIm2ColExp_t pfnEncodeIm2ColExp; - ur_pfnTensorMapEncodeTiledExp_t pfnEncodeTiledExp; -} ur_tensor_map_exp_dditable_t; - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's TensorMapExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - /// [in] API version requested - ur_api_version_t version, - /// [in,out] pointer to table of DDI function pointers - ur_tensor_map_exp_dditable_t *pDdiTable); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Function-pointer for urGetTensorMapExpProcAddrTable -typedef ur_result_t(UR_APICALL *ur_pfnGetTensorMapExpProcAddrTable_t)( - ur_api_version_t, ur_tensor_map_exp_dditable_t *); - /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urUsmP2PEnablePeerAccessExp typedef ur_result_t(UR_APICALL *ur_pfnUsmP2PEnablePeerAccessExp_t)( @@ -1948,7 +1903,6 @@ typedef struct ur_dditable_t { ur_usm_dditable_t USM; ur_usm_exp_dditable_t USMExp; ur_command_buffer_exp_dditable_t CommandBufferExp; - ur_tensor_map_exp_dditable_t TensorMapExp; ur_usm_p2p_exp_dditable_t UsmP2PExp; ur_virtual_mem_dditable_t VirtualMem; ur_device_dditable_t Device; diff --git a/include/ur_print.h b/include/ur_print.h index b2e16f6341..bcdd8172f8 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1394,56 +1394,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpEnqueueNativeCommandProperties( const struct ur_exp_enqueue_native_command_properties_t params, char *buffer, const size_t buff_size, size_t *out_size); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_data_type_flag_t enum -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapDataTypeFlags( - enum ur_exp_tensor_map_data_type_flag_t value, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_interleave_flag_t enum -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapInterleaveFlags( - enum ur_exp_tensor_map_interleave_flag_t value, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_l2_promotion_flag_t enum -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapL2PromotionFlags( - enum ur_exp_tensor_map_l2_promotion_flag_t value, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_swizzle_flag_t enum -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapSwizzleFlags( - enum ur_exp_tensor_map_swizzle_flag_t value, char *buffer, - const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_oob_fill_flag_t enum -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintExpTensorMapOobFillFlags( - enum ur_exp_tensor_map_oob_fill_flag_t value, char *buffer, - const size_t buff_size, size_t *out_size); - /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_loader_config_create_params_t struct /// @returns @@ -3294,26 +3244,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintCommandBufferCommandGetInfoExpParams( const struct ur_command_buffer_command_get_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_tensor_map_encode_im_2_col_exp_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintTensorMapEncodeIm_2ColExpParams( - const struct ur_tensor_map_encode_im_2_col_exp_params_t *params, - char *buffer, const size_t buff_size, size_t *out_size); - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_tensor_map_encode_tiled_exp_params_t struct -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// - `buff_size < out_size` -UR_APIEXPORT ur_result_t UR_APICALL urPrintTensorMapEncodeTiledExpParams( - const struct ur_tensor_map_encode_tiled_exp_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size); - /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_usm_p2p_enable_peer_access_exp_params_t struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index c9be894725..c9a4d3d246 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -42,7 +42,6 @@ template <> struct is_handle : std::true_type {}; template <> struct is_handle : std::true_type {}; -template <> struct is_handle : std::true_type {}; template inline constexpr bool is_handle_v = is_handle::value; template inline ur_result_t printPtr(std::ostream &os, const T *ptr); @@ -260,27 +259,6 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t flag); -template <> -inline ur_result_t -printFlag(std::ostream &os, uint32_t flag); - -template <> -inline ur_result_t -printFlag(std::ostream &os, uint32_t flag); - -template <> -inline ur_result_t -printFlag(std::ostream &os, - uint32_t flag); - -template <> -inline ur_result_t printFlag(std::ostream &os, - uint32_t flag); - -template <> -inline ur_result_t -printFlag(std::ostream &os, uint32_t flag); - } // namespace ur::details inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value); @@ -598,16 +576,6 @@ inline std::ostream &operator<<( std::ostream &os, [[maybe_unused]] const struct ur_exp_enqueue_native_command_properties_t params); -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_tensor_map_data_type_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_tensor_map_interleave_flag_t value); -inline std::ostream & -operator<<(std::ostream &os, enum ur_exp_tensor_map_l2_promotion_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_tensor_map_swizzle_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_tensor_map_oob_fill_flag_t value); /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_function_t type @@ -1215,12 +1183,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) { case UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT: os << "UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT"; break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: - os << "UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP"; - break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: - os << "UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP"; - break; case UR_FUNCTION_PHYSICAL_MEM_GET_INFO: os << "UR_FUNCTION_PHYSICAL_MEM_GET_INFO"; break; @@ -11597,542 +11559,6 @@ operator<<(std::ostream &os, os << "}"; return os; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_tensor_map_data_type_flag_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_tensor_map_data_type_flag_t value) { - switch (value) { - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32"; - break; - case UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ: - os << "UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -namespace ur::details { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_data_type_flag_t flag -template <> -inline ur_result_t -printFlag(std::ostream &os, uint32_t flag) { - uint32_t val = flag; - bool first = true; - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32; - } - - if ((val & UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ) == - (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ; - } - if (val != 0) { - std::bitset<32> bits(val); - if (!first) { - os << " | "; - } - os << "unknown bit flags " << bits; - } else if (first) { - os << "0"; - } - return UR_RESULT_SUCCESS; -} -} // namespace ur::details -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_tensor_map_interleave_flag_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, enum ur_exp_tensor_map_interleave_flag_t value) { - switch (value) { - case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE: - os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE"; - break; - case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B: - os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B"; - break; - case UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B: - os << "UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -namespace ur::details { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_interleave_flag_t flag -template <> -inline ur_result_t -printFlag(std::ostream &os, - uint32_t flag) { - uint32_t val = flag; - bool first = true; - - if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE) == - (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE; - } - - if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B) == - (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B; - } - - if ((val & UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B) == - (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B; - } - if (val != 0) { - std::bitset<32> bits(val); - if (!first) { - os << " | "; - } - os << "unknown bit flags " << bits; - } else if (first) { - os << "0"; - } - return UR_RESULT_SUCCESS; -} -} // namespace ur::details -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_tensor_map_l2_promotion_flag_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, enum ur_exp_tensor_map_l2_promotion_flag_t value) { - switch (value) { - case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE: - os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE"; - break; - case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B: - os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B"; - break; - case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B: - os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B"; - break; - case UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B: - os << "UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -namespace ur::details { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_l2_promotion_flag_t flag -template <> -inline ur_result_t -printFlag(std::ostream &os, - uint32_t flag) { - uint32_t val = flag; - bool first = true; - - if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE) == - (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE; - } - - if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B) == - (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B; - } - - if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B) == - (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B; - } - - if ((val & UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B) == - (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B; - } - if (val != 0) { - std::bitset<32> bits(val); - if (!first) { - os << " | "; - } - os << "unknown bit flags " << bits; - } else if (first) { - os << "0"; - } - return UR_RESULT_SUCCESS; -} -} // namespace ur::details -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_tensor_map_swizzle_flag_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_tensor_map_swizzle_flag_t value) { - switch (value) { - case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE: - os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE"; - break; - case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B: - os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B"; - break; - case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B: - os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B"; - break; - case UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B: - os << "UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -namespace ur::details { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_swizzle_flag_t flag -template <> -inline ur_result_t printFlag(std::ostream &os, - uint32_t flag) { - uint32_t val = flag; - bool first = true; - - if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE) == - (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE; - } - - if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B) == - (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B; - } - - if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B) == - (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B; - } - - if ((val & UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B) == - (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B; - } - if (val != 0) { - std::bitset<32> bits(val); - if (!first) { - os << " | "; - } - os << "unknown bit flags " << bits; - } else if (first) { - os << "0"; - } - return UR_RESULT_SUCCESS; -} -} // namespace ur::details -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_exp_tensor_map_oob_fill_flag_t type -/// @returns -/// std::ostream & -inline std::ostream &operator<<(std::ostream &os, - enum ur_exp_tensor_map_oob_fill_flag_t value) { - switch (value) { - case UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE: - os << "UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE"; - break; - case UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA: - os << "UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} - -namespace ur::details { -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print ur_exp_tensor_map_oob_fill_flag_t flag -template <> -inline ur_result_t -printFlag(std::ostream &os, uint32_t flag) { - uint32_t val = flag; - bool first = true; - - if ((val & UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE) == - (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE; - } - - if ((val & UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA) == - (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA) { - val ^= (uint32_t)UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA; - } - if (val != 0) { - std::bitset<32> bits(val); - if (!first) { - os << " | "; - } - os << "unknown bit flags " << bits; - } else if (first) { - os << "0"; - } - return UR_RESULT_SUCCESS; -} -} // namespace ur::details /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_loader_config_create_params_t type @@ -19371,184 +18797,6 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_tensor_map_encode_im_2_col_exp_params_t -/// type -/// @returns -/// std::ostream & -inline std::ostream &operator<<( - std::ostream &os, - [[maybe_unused]] const struct ur_tensor_map_encode_im_2_col_exp_params_t - *params) { - - os << ".hDevice = "; - - ur::details::printPtr(os, *(params->phDevice)); - - os << ", "; - os << ".TensorMapType = "; - - ur::details::printFlag( - os, *(params->pTensorMapType)); - - os << ", "; - os << ".TensorRank = "; - - os << *(params->pTensorRank); - - os << ", "; - os << ".GlobalAddress = "; - - ur::details::printPtr(os, *(params->pGlobalAddress)); - - os << ", "; - os << ".GlobalDim = "; - - ur::details::printPtr(os, *(params->pGlobalDim)); - - os << ", "; - os << ".GlobalStrides = "; - - ur::details::printPtr(os, *(params->pGlobalStrides)); - - os << ", "; - os << ".PixelBoxLowerCorner = "; - - ur::details::printPtr(os, *(params->pPixelBoxLowerCorner)); - - os << ", "; - os << ".PixelBoxUpperCorner = "; - - ur::details::printPtr(os, *(params->pPixelBoxUpperCorner)); - - os << ", "; - os << ".ChannelsPerPixel = "; - - os << *(params->pChannelsPerPixel); - - os << ", "; - os << ".PixelsPerColumn = "; - - os << *(params->pPixelsPerColumn); - - os << ", "; - os << ".ElementStrides = "; - - ur::details::printPtr(os, *(params->pElementStrides)); - - os << ", "; - os << ".Interleave = "; - - ur::details::printFlag( - os, *(params->pInterleave)); - - os << ", "; - os << ".Swizzle = "; - - ur::details::printFlag(os, - *(params->pSwizzle)); - - os << ", "; - os << ".L2Promotion = "; - - ur::details::printFlag( - os, *(params->pL2Promotion)); - - os << ", "; - os << ".OobFill = "; - - ur::details::printFlag( - os, *(params->pOobFill)); - - os << ", "; - os << ".hTensorMap = "; - - ur::details::printPtr(os, *(params->phTensorMap)); - - return os; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Print operator for the ur_tensor_map_encode_tiled_exp_params_t type -/// @returns -/// std::ostream & -inline std::ostream & -operator<<(std::ostream &os, - [[maybe_unused]] const struct ur_tensor_map_encode_tiled_exp_params_t - *params) { - - os << ".hDevice = "; - - ur::details::printPtr(os, *(params->phDevice)); - - os << ", "; - os << ".TensorMapType = "; - - ur::details::printFlag( - os, *(params->pTensorMapType)); - - os << ", "; - os << ".TensorRank = "; - - os << *(params->pTensorRank); - - os << ", "; - os << ".GlobalAddress = "; - - ur::details::printPtr(os, *(params->pGlobalAddress)); - - os << ", "; - os << ".GlobalDim = "; - - ur::details::printPtr(os, *(params->pGlobalDim)); - - os << ", "; - os << ".GlobalStrides = "; - - ur::details::printPtr(os, *(params->pGlobalStrides)); - - os << ", "; - os << ".BoxDim = "; - - ur::details::printPtr(os, *(params->pBoxDim)); - - os << ", "; - os << ".ElementStrides = "; - - ur::details::printPtr(os, *(params->pElementStrides)); - - os << ", "; - os << ".Interleave = "; - - ur::details::printFlag( - os, *(params->pInterleave)); - - os << ", "; - os << ".Swizzle = "; - - ur::details::printFlag(os, - *(params->pSwizzle)); - - os << ", "; - os << ".L2Promotion = "; - - ur::details::printFlag( - os, *(params->pL2Promotion)); - - os << ", "; - os << ".OobFill = "; - - ur::details::printFlag( - os, *(params->pOobFill)); - - os << ", "; - os << ".hTensorMap = "; - - ur::details::printPtr(os, *(params->phTensorMap)); - - return os; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_usm_p2p_enable_peer_access_exp_params_t /// type @@ -20844,12 +20092,6 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, os << (const struct ur_command_buffer_command_get_info_exp_params_t *) params; } break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP: { - os << (const struct ur_tensor_map_encode_im_2_col_exp_params_t *)params; - } break; - case UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP: { - os << (const struct ur_tensor_map_encode_tiled_exp_params_t *)params; - } break; case UR_FUNCTION_USM_P2P_ENABLE_PEER_ACCESS_EXP: { os << (const struct ur_usm_p2p_enable_peer_access_exp_params_t *)params; } break; diff --git a/scripts/core/EXP-TENSOR-MAP.rst b/scripts/core/EXP-TENSOR-MAP.rst deleted file mode 100644 index 15a6802363..0000000000 --- a/scripts/core/EXP-TENSOR-MAP.rst +++ /dev/null @@ -1,75 +0,0 @@ -<% - OneApi=tags['$OneApi'] - x=tags['$x'] - X=x.upper() -%> - -.. _experimental-enqueue-native-command: - -================================================================================ -Tensor Mapping APIs -================================================================================ - -.. warning:: - - Experimental features: - - * May be replaced, updated, or removed at any time. - * Do not require maintaining API/ABI stability of their own additions over - time. - * Do not require conformance testing of their own additions. - - -Motivation --------------------------------------------------------------------------------- - -Used to target the CUDA entry points ``cuTensorMapEncodeIm2col`` and -``cuTensorMapEncodeTiled``. - -For some tensor core operations on ``sm_90+`` Nvidia devices, a tensor -descriptor must be built on the host and passed to the kernel. The interfaces -mentioned above, and mapped to UR in this extension, provide the APIs necessary -to create these tensor descriptor objects, that can then be passed to the -kernels. - -API --------------------------------------------------------------------------------- - -Enums -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -${x}_exp_tensor_map_data_type_flags_t -${x}_exp_tensor_map_interleave_flags_t -${x}_exp_tensor_map_l2_promotion_flags_t -${x}_exp_tensor_map_swizzle_flags_t -${x}_exp_tensor_map_oob_fill_flags_t - -Types -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -${x}_exp_tensor_map_handle_t - -Functions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -* ${x}TensorMapEncodeIm2ColExp -* ${x}TensorMapEncodeTiledExp - -Changelog --------------------------------------------------------------------------------- - -+-----------+------------------------+ -| Revision | Changes | -+===========+========================+ -| 1.0 | Initial Draft | -+-----------+------------------------+ - - -Support --------------------------------------------------------------------------------- - -This extension is only supported on the ``UR_PLATFORM_BACKEND_CUDA`` backend. - -Contributors --------------------------------------------------------------------------------- - -* Hugh Delaney `hugh.delaney@codeplay.com `_ diff --git a/scripts/core/exp-tensor-map.yml b/scripts/core/exp-tensor-map.yml deleted file mode 100644 index fa1e8c1898..0000000000 --- a/scripts/core/exp-tensor-map.yml +++ /dev/null @@ -1,213 +0,0 @@ -# -# Copyright (C) 2024 Intel Corporation -# -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# See YaML.md for syntax definition -# ---- #-------------------------------------------------------------------------- -type: header -desc: "Intel $OneApi Unified Runtime Experimental API for mapping tensor objects" -ordinal: "100" - ---- #-------------------------------------------------------------------------- -type: handle -desc: "Handle of tensor map object" -class: $xTensorMap -name: "$x_exp_tensor_map_handle_t" - ---- #-------------------------------------------------------------------------- -type: enum -desc: "Tensor map data type" -name: $x_exp_tensor_map_data_type_flags_t -etors: - - name: UINT8 - desc: "1 byte" - - name: UINT16 - desc: "2 bytes" - - name: UINT32 - desc: "4 bytes" - - name: INT32 - desc: "4 bytes" - - name: UINT64 - desc: "8 bytes" - - name: INT64 - desc: "8 bytes" - - name: FLOAT16 - desc: "2 bytes" - - name: FLOAT32 - desc: "4 bytes" - - name: FLOAT64 - desc: "8 bytes" - - name: BFLOAT16 - desc: "2 bytes" - - name: FLOAT32_FTZ - desc: "4 bytes" - - name: TFLOAT32 - desc: "4 bytes" - - name: TFLOAT32_FTZ - desc: "4 bytes" - ---- #-------------------------------------------------------------------------- -type: enum -desc: "Tensor map interleave" -name: $x_exp_tensor_map_interleave_flags_t -etors: - - name: NONE - desc: "No interleave" - - name: 16B - desc: "16B interleave" - - name: 32B - desc: "32B interleave" - ---- #-------------------------------------------------------------------------- -type: enum -desc: "Tensor map l2 promotion" -name: $x_exp_tensor_map_l2_promotion_flags_t -etors: - - name: NONE - desc: "No promotion type" - - name: 64B - desc: "64B promotion type" - - name: 128B - desc: "128B promotion type" - - name: 256B - desc: "256B promotion type" - ---- #-------------------------------------------------------------------------- -type: enum -desc: "Tensor map swizzle" -name: $x_exp_tensor_map_swizzle_flags_t -etors: - - name: NONE - desc: "No swizzle" - - name: 32B - desc: "32B swizzle" - - name: 64B - desc: "64B swizzle" - - name: 128B - desc: "128B swizzle" - ---- #-------------------------------------------------------------------------- -type: enum -desc: "Tensor map OOB fill" -name: $x_exp_tensor_map_oob_fill_flags_t -etors: - - name: NONE - desc: "No OOB fill" - - name: REQUEST_ZERO_FMA - desc: "Refer to NVIDIA docs" - ---- #-------------------------------------------------------------------------- -type: function -desc: "Encode tensor map with image data" -class: $xTensorMap -name: EncodeIm2ColExp -details: - - "Map encode using im2col." -params: - - type: $x_device_handle_t - name: hDevice - desc: "[in] Handle of the device object." - - type: $x_exp_tensor_map_data_type_flags_t - name: TensorMapType - desc: "[in] Data type of the tensor object." - - type: uint32_t - name: TensorRank - desc: "[in] Dimensionality of tensor; must be at least 3." - - type: void* - name: GlobalAddress - desc: "[in] Starting address of memory region described by tensor." - - type: const uint64_t* - name: GlobalDim - desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." - - type: const uint64_t* - name: GlobalStrides - desc: "[in] Array containing stride size (in bytes) along each of the TensorRank - 1 dimensions." - - type: const int* - name: PixelBoxLowerCorner - desc: "[in] Array containing DHW dimensions of lower box corner." - - type: const int* - name: PixelBoxUpperCorner - desc: "[in] Array containing DHW dimensions of upper box corner." - - type: uint32_t - name: ChannelsPerPixel - desc: "[in] Number of channels per pixel." - - type: uint32_t - name: PixelsPerColumn - desc: "[in] Number of pixels per column." - - type: const uint32_t* - name: ElementStrides - desc: "[in] Array containing traversal stride in each of the TensorRank dimensions." - - type: $x_exp_tensor_map_interleave_flags_t - name: Interleave - desc: "[in] Type of interleaved layout the tensor addresses" - - type: $x_exp_tensor_map_swizzle_flags_t - name: Swizzle - desc: "[in] Bank swizzling pattern inside shared memory" - - type: $x_exp_tensor_map_l2_promotion_flags_t - name: L2Promotion - desc: "[in] L2 promotion size." - - type: $x_exp_tensor_map_oob_fill_flags_t - name: OobFill - desc: "[in] Indicates whether zero or special NaN constant will be used to fill out-of-bounds elements." - - type: $x_exp_tensor_map_handle_t* - name: hTensorMap - desc: "[out] Handle of the tensor map object." -returns: - - $X_RESULT_ERROR_INVALID_ARGUMENT: - - "`TensorRank < 3`" - ---- #-------------------------------------------------------------------------- -type: function -desc: "Encode tensor map with tiled data" -class: $xTensorMap -name: EncodeTiledExp -details: - - "Tiled map encode." -params: - - type: $x_device_handle_t - name: hDevice - desc: "[in] Handle of the device object." - - type: $x_exp_tensor_map_data_type_flags_t - name: TensorMapType - desc: "[in] Data type of the tensor object." - - type: uint32_t - name: TensorRank - desc: "[in] Dimensionality of tensor; must be at least 3." - - type: void* - name: GlobalAddress - desc: "[in] Starting address of memory region described by tensor." - - type: const uint64_t* - name: GlobalDim - desc: "[in] Array containing tensor size (number of elements) along each of the TensorRank dimensions." - - type: const uint64_t* - name: GlobalStrides - desc: "[in] Array containing stride size (in bytes) along each of the TensorRank - 1 dimensions." - - type: const uint32_t* - name: BoxDim - desc: "[in] Array containing traversal box size (number of elments) along each of the TensorRank dimensions. Specifies how many elements to be traversed along each tensor dimension." - - type: const uint32_t* - name: ElementStrides - desc: "[in] Array containing traversal stride in each of the TensorRank dimensions." - - type: $x_exp_tensor_map_interleave_flags_t - name: Interleave - desc: "[in] Type of interleaved layout the tensor addresses" - - type: $x_exp_tensor_map_swizzle_flags_t - name: Swizzle - desc: "[in] Bank swizzling pattern inside shared memory" - - type: $x_exp_tensor_map_l2_promotion_flags_t - name: L2Promotion - desc: "[in] L2 promotion size." - - type: $x_exp_tensor_map_oob_fill_flags_t - name: OobFill - desc: "[in] Indicates whether zero or special NaN constant will be used to fill out-of-bounds elements." - - type: $x_exp_tensor_map_handle_t* - name: hTensorMap - desc: "[out] Handle of the tensor map object." -returns: - - $X_RESULT_ERROR_INVALID_ARGUMENT: - - "`TensorRank < 3`" - diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 2ae34fb0b4..379ef87b35 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -607,12 +607,6 @@ etors: - name: ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT desc: Enumerator for $xEnqueueEventsWaitWithBarrierExt value: '246' -- name: TENSOR_MAP_ENCODE_IM_2_COL_EXP - desc: Enumerator for $xTensorMapEncodeIm2ColExp - value: '247' -- name: TENSOR_MAP_ENCODE_TILED_EXP - desc: Enumerator for $xTensorMapEncodeTiledExp - value: '248' - name: PHYSICAL_MEM_GET_INFO desc: Enumerator for $xPhysicalMemGetInfo value: '249' diff --git a/source/adapters/adapter.def.in b/source/adapters/adapter.def.in index fd37178966..3c18c78bd1 100644 --- a/source/adapters/adapter.def.in +++ b/source/adapters/adapter.def.in @@ -16,7 +16,6 @@ EXPORTS urGetProgramExpProcAddrTable urGetQueueProcAddrTable urGetSamplerProcAddrTable - urGetTensorMapExpProcAddrTable urGetUSMProcAddrTable urGetUSMExpProcAddrTable urGetUsmP2PExpProcAddrTable diff --git a/source/adapters/adapter.map.in b/source/adapters/adapter.map.in index 50db54ef40..bb08ae7d88 100644 --- a/source/adapters/adapter.map.in +++ b/source/adapters/adapter.map.in @@ -16,7 +16,6 @@ urGetProgramExpProcAddrTable; urGetQueueProcAddrTable; urGetSamplerProcAddrTable; - urGetTensorMapExpProcAddrTable; urGetUSMProcAddrTable; urGetUSMExpProcAddrTable; urGetUsmP2PExpProcAddrTable; diff --git a/source/adapters/cuda/CMakeLists.txt b/source/adapters/cuda/CMakeLists.txt index 3d0418fd07..b6b153a5d8 100644 --- a/source/adapters/cuda/CMakeLists.txt +++ b/source/adapters/cuda/CMakeLists.txt @@ -38,7 +38,6 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tracing.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 0c27241548..9b40088f75 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -301,7 +301,7 @@ struct ur_exp_command_buffer_handle_t_ { ur_exp_command_buffer_handle_t_(ur_context_handle_t Context, ur_device_handle_t Device, bool IsUpdatable); - virtual ~ur_exp_command_buffer_handle_t_(); + ~ur_exp_command_buffer_handle_t_(); void registerSyncPoint(ur_exp_command_buffer_sync_point_t SyncPoint, CUgraphNode CuNode) { diff --git a/source/adapters/cuda/tensor_map.cpp b/source/adapters/cuda/tensor_map.cpp deleted file mode 100644 index 141388a0e4..0000000000 --- a/source/adapters/cuda/tensor_map.cpp +++ /dev/null @@ -1,161 +0,0 @@ -//===--------- tensor_map.cpp - CUDA Adapter ------------------------------===// -// -// Copyright (C) 2024 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "context.hpp" - -#if CUDA_VERSION < 12000 -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, - uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, - ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, - ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, - ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, - ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, - ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} -#else -struct ur_exp_tensor_map_handle_t_ { - CUtensorMap Map; -}; - -#define CONVERT(URTYPE, CUTYPE) \ - if ((URTYPE) & UrType) \ - return (CUTYPE); - -inline CUtensorMapDataType -convertUrToCuDataType(ur_exp_tensor_map_data_type_flags_t UrType) { - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8, - CU_TENSOR_MAP_DATA_TYPE_UINT8); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16, - CU_TENSOR_MAP_DATA_TYPE_UINT16); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32, - CU_TENSOR_MAP_DATA_TYPE_UINT32); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32, - CU_TENSOR_MAP_DATA_TYPE_INT32); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64, - CU_TENSOR_MAP_DATA_TYPE_UINT64); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64, - CU_TENSOR_MAP_DATA_TYPE_INT64); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16, - CU_TENSOR_MAP_DATA_TYPE_FLOAT16); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32, - CU_TENSOR_MAP_DATA_TYPE_FLOAT32); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64, - CU_TENSOR_MAP_DATA_TYPE_FLOAT64); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16, - CU_TENSOR_MAP_DATA_TYPE_BFLOAT16); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ, - CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32, - CU_TENSOR_MAP_DATA_TYPE_TFLOAT32); - CONVERT(UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ, - CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ); - throw "convertUrToCuDataType failed!"; -} - -CUtensorMapInterleave -convertUrToCuInterleave(ur_exp_tensor_map_interleave_flags_t UrType) { - CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE, - CU_TENSOR_MAP_INTERLEAVE_NONE); - CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B, CU_TENSOR_MAP_INTERLEAVE_16B); - CONVERT(UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B, CU_TENSOR_MAP_INTERLEAVE_32B); - throw "convertUrToCuInterleave failed!"; -} - -CUtensorMapSwizzle -convertUrToCuSwizzle(ur_exp_tensor_map_swizzle_flags_t UrType) { - CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE, CU_TENSOR_MAP_SWIZZLE_NONE); - CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B, CU_TENSOR_MAP_SWIZZLE_32B); - CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B, CU_TENSOR_MAP_SWIZZLE_64B); - CONVERT(UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B, CU_TENSOR_MAP_SWIZZLE_128B); - throw "convertUrToCuSwizzle failed!"; -} - -CUtensorMapL2promotion -convertUrToCuL2Promotion(ur_exp_tensor_map_l2_promotion_flags_t UrType) { - CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE, - CU_TENSOR_MAP_L2_PROMOTION_NONE); - CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B, - CU_TENSOR_MAP_L2_PROMOTION_L2_64B); - CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B, - CU_TENSOR_MAP_L2_PROMOTION_L2_128B); - CONVERT(UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B, - CU_TENSOR_MAP_L2_PROMOTION_L2_256B); - throw "convertUrToCul2promotion failed!"; -} - -CUtensorMapFloatOOBfill -convertUrToCuOobFill(ur_exp_tensor_map_oob_fill_flags_t UrType) { - CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE, - CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE); - CONVERT(UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA, - CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA); - throw "convertUrToCuDataOOBfill failed!"; -} - -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - ur_device_handle_t hDevice, - ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, - void *GlobalAddress, const uint64_t *GlobalDim, - const uint64_t *GlobalStrides, const int *PixelBoxLowerCorner, - const int *PixelBoxUpperCorner, uint32_t ChannelsPerPixel, - uint32_t PixelsPerColumn, const uint32_t *ElementStrides, - ur_exp_tensor_map_interleave_flags_t Interleave, - ur_exp_tensor_map_swizzle_flags_t Swizzle, - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - ur_exp_tensor_map_oob_fill_flags_t OobFill, - ur_exp_tensor_map_handle_t *hTensorMap) { - ScopedContext Active(hDevice); - try { - UR_CHECK_ERROR(cuTensorMapEncodeIm2col( - &(*hTensorMap)->Map, convertUrToCuDataType(TensorMapType), TensorRank, - GlobalAddress, GlobalDim, GlobalStrides, PixelBoxLowerCorner, - PixelBoxUpperCorner, ChannelsPerPixel, PixelsPerColumn, ElementStrides, - convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), - convertUrToCuL2Promotion(L2Promotion), convertUrToCuOobFill(OobFill))); - } catch (ur_result_t Err) { - return Err; - } - return UR_RESULT_SUCCESS; -} -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - ur_device_handle_t hDevice, - ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, - void *GlobalAddress, const uint64_t *GlobalDim, - const uint64_t *GlobalStrides, const uint32_t *BoxDim, - const uint32_t *ElementStrides, - ur_exp_tensor_map_interleave_flags_t Interleave, - ur_exp_tensor_map_swizzle_flags_t Swizzle, - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - ur_exp_tensor_map_oob_fill_flags_t OobFill, - ur_exp_tensor_map_handle_t *hTensorMap) { - ScopedContext Active(hDevice); - try { - UR_CHECK_ERROR(cuTensorMapEncodeTiled( - &(*hTensorMap)->Map, convertUrToCuDataType(TensorMapType), TensorRank, - GlobalAddress, GlobalDim, GlobalStrides, BoxDim, ElementStrides, - convertUrToCuInterleave(Interleave), convertUrToCuSwizzle(Swizzle), - convertUrToCuL2Promotion(L2Promotion), convertUrToCuOobFill(OobFill))); - } catch (ur_result_t Err) { - return Err; - } - return UR_RESULT_SUCCESS; -} -#endif diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index d3fea1aee2..b373570d38 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -435,19 +435,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; - pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; - - return result; -} - UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/hip/CMakeLists.txt b/source/adapters/hip/CMakeLists.txt index 36222907c6..9113d7b1ca 100644 --- a/source/adapters/hip/CMakeLists.txt +++ b/source/adapters/hip/CMakeLists.txt @@ -86,7 +86,6 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/hip/tensor_map.cpp b/source/adapters/hip/tensor_map.cpp deleted file mode 100644 index 348c4c9d05..0000000000 --- a/source/adapters/hip/tensor_map.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===--------- tensor_map.cpp - HIP Adapter -------------------------------===// -// -// Copyright (C) 2024 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, - uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, - ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, - ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, - ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, - ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, - ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 5e5e6ade84..413fa385d0 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -401,19 +401,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; - pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; - - return result; -} - UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index c75c870be7..5bb5cf67fb 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -45,7 +45,6 @@ if(UR_BUILD_ADAPTER_L0) ${CMAKE_CURRENT_SOURCE_DIR}/program.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/kernel_helpers.cpp ${CMAKE_CURRENT_SOURCE_DIR}/helpers/memory_helpers.cpp @@ -143,7 +142,6 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp # v2-only sources ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp diff --git a/source/adapters/level_zero/tensor_map.cpp b/source/adapters/level_zero/tensor_map.cpp deleted file mode 100644 index 91d6498540..0000000000 --- a/source/adapters/level_zero/tensor_map.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===--------- tensor_map.cpp - L0 Adapter --------------------------------===// -// -// Copyright (C) 2024 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -namespace ur::level_zero { - -ur_result_t urTensorMapEncodeIm2ColExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, - uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, - ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, - ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t urTensorMapEncodeTiledExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, - ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, - ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, - ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} -} // namespace ur::level_zero diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 739a3f3ee4..c938ff1f95 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -425,19 +425,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } -UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - pDdiTable->pfnEncodeIm2ColExp = ur::level_zero::urTensorMapEncodeIm2ColExp; - pDdiTable->pfnEncodeTiledExp = ur::level_zero::urTensorMapEncodeTiledExp; - - return result; -} - UR_APIEXPORT ur_result_t UR_APICALL urGetUSMProcAddrTable(ur_api_version_t version, ur_usm_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); @@ -609,10 +596,6 @@ ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi) { &ddi->Sampler); if (result != UR_RESULT_SUCCESS) return result; - result = ur::level_zero::urGetTensorMapExpProcAddrTable( - UR_API_VERSION_CURRENT, &ddi->TensorMapExp); - if (result != UR_RESULT_SUCCESS) - return result; result = ur::level_zero::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &ddi->USM); if (result != UR_RESULT_SUCCESS) diff --git a/source/adapters/level_zero/ur_interface_loader.hpp b/source/adapters/level_zero/ur_interface_loader.hpp index 0814427837..8620eeea81 100644 --- a/source/adapters/level_zero/ur_interface_loader.hpp +++ b/source/adapters/level_zero/ur_interface_loader.hpp @@ -741,30 +741,6 @@ ur_result_t urEnqueueNativeCommandExp( const ur_exp_enqueue_native_command_properties_t *pProperties, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); -ur_result_t urTensorMapEncodeIm2ColExp( - ur_device_handle_t hDevice, - ur_exp_tensor_map_data_type_flags_t TensorMapType, uint32_t TensorRank, - void *GlobalAddress, const uint64_t *GlobalDim, - const uint64_t *GlobalStrides, const int *PixelBoxLowerCorner, - const int *PixelBoxUpperCorner, uint32_t ChannelsPerPixel, - uint32_t PixelsPerColumn, const uint32_t *ElementStrides, - ur_exp_tensor_map_interleave_flags_t Interleave, - ur_exp_tensor_map_swizzle_flags_t Swizzle, - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - ur_exp_tensor_map_oob_fill_flags_t OobFill, - ur_exp_tensor_map_handle_t *hTensorMap); -ur_result_t -urTensorMapEncodeTiledExp(ur_device_handle_t hDevice, - ur_exp_tensor_map_data_type_flags_t TensorMapType, - uint32_t TensorRank, void *GlobalAddress, - const uint64_t *GlobalDim, - const uint64_t *GlobalStrides, const uint32_t *BoxDim, - const uint32_t *ElementStrides, - ur_exp_tensor_map_interleave_flags_t Interleave, - ur_exp_tensor_map_swizzle_flags_t Swizzle, - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - ur_exp_tensor_map_oob_fill_flags_t OobFill, - ur_exp_tensor_map_handle_t *hTensorMap); #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO ur_result_t urAdapterGetDdiTables(ur_dditable_t *ddi); #endif diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index ad9fbfd1fb..bb73c1c172 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -11015,175 +11015,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return exceptionToResult(std::current_exception()); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeIm2ColExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing DHW dimensions of lower box corner. - const int *PixelBoxLowerCorner, - /// [in] Array containing DHW dimensions of upper box corner. - const int *PixelBoxUpperCorner, - /// [in] Number of channels per pixel. - uint32_t ChannelsPerPixel, - /// [in] Number of pixels per column. - uint32_t PixelsPerColumn, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) try { - ur_result_t result = UR_RESULT_SUCCESS; - - ur_tensor_map_encode_im_2_col_exp_params_t params = {&hDevice, - &TensorMapType, - &TensorRank, - &GlobalAddress, - &GlobalDim, - &GlobalStrides, - &PixelBoxLowerCorner, - &PixelBoxUpperCorner, - &ChannelsPerPixel, - &PixelsPerColumn, - &ElementStrides, - &Interleave, - &Swizzle, - &L2Promotion, - &OobFill, - &hTensorMap}; - - auto beforeCallback = reinterpret_cast( - mock::getCallbacks().get_before_callback("urTensorMapEncodeIm2ColExp")); - if (beforeCallback) { - result = beforeCallback(¶ms); - if (result != UR_RESULT_SUCCESS) { - return result; - } - } - - auto replaceCallback = reinterpret_cast( - mock::getCallbacks().get_replace_callback("urTensorMapEncodeIm2ColExp")); - if (replaceCallback) { - result = replaceCallback(¶ms); - } else { - - *hTensorMap = mock::createDummyHandle(); - result = UR_RESULT_SUCCESS; - } - - if (result != UR_RESULT_SUCCESS) { - return result; - } - - auto afterCallback = reinterpret_cast( - mock::getCallbacks().get_after_callback("urTensorMapEncodeIm2ColExp")); - if (afterCallback) { - return afterCallback(¶ms); - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeTiledExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing traversal box size (number of elments) along - /// each of the TensorRank dimensions. Specifies how many elements to be - /// traversed along each tensor dimension. - const uint32_t *BoxDim, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) try { - ur_result_t result = UR_RESULT_SUCCESS; - - ur_tensor_map_encode_tiled_exp_params_t params = { - &hDevice, &TensorMapType, &TensorRank, &GlobalAddress, - &GlobalDim, &GlobalStrides, &BoxDim, &ElementStrides, - &Interleave, &Swizzle, &L2Promotion, &OobFill, - &hTensorMap}; - - auto beforeCallback = reinterpret_cast( - mock::getCallbacks().get_before_callback("urTensorMapEncodeTiledExp")); - if (beforeCallback) { - result = beforeCallback(¶ms); - if (result != UR_RESULT_SUCCESS) { - return result; - } - } - - auto replaceCallback = reinterpret_cast( - mock::getCallbacks().get_replace_callback("urTensorMapEncodeTiledExp")); - if (replaceCallback) { - result = replaceCallback(¶ms); - } else { - - *hTensorMap = mock::createDummyHandle(); - result = UR_RESULT_SUCCESS; - } - - if (result != UR_RESULT_SUCCESS) { - return result; - } - - auto afterCallback = reinterpret_cast( - mock::getCallbacks().get_after_callback("urTensorMapEncodeTiledExp")); - if (afterCallback) { - return afterCallback(¶ms); - } - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - } // namespace driver #if defined(__cplusplus) @@ -11965,36 +11796,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return exceptionToResult(std::current_exception()); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's TensorMapExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - /// [in] API version requested - ur_api_version_t version, - /// [in,out] pointer to table of DDI function pointers - ur_tensor_map_exp_dditable_t *pDdiTable) try { - if (nullptr == pDdiTable) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (driver::d_context.version < version) - return UR_RESULT_ERROR_UNSUPPORTED_VERSION; - - ur_result_t result = UR_RESULT_SUCCESS; - - pDdiTable->pfnEncodeIm2ColExp = driver::urTensorMapEncodeIm2ColExp; - - pDdiTable->pfnEncodeTiledExp = driver::urTensorMapEncodeTiledExp; - - return result; -} catch (...) { - return exceptionToResult(std::current_exception()); -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses diff --git a/source/adapters/native_cpu/CMakeLists.txt b/source/adapters/native_cpu/CMakeLists.txt index 983f44caef..17467bfdef 100644 --- a/source/adapters/native_cpu/CMakeLists.txt +++ b/source/adapters/native_cpu/CMakeLists.txt @@ -35,7 +35,6 @@ add_ur_adapter(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/queue.hpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ur_interface_loader.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/native_cpu/tensor_map.cpp b/source/adapters/native_cpu/tensor_map.cpp deleted file mode 100644 index eb9f01b318..0000000000 --- a/source/adapters/native_cpu/tensor_map.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===--------- tensor_map.cpp - Native CPU Adapter ------------------------===// -// -// Copyright (C) 2024 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, - uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, - ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, - ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, - ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, - ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, - ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 6683ad8d8b..3fe3863bcd 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -420,19 +420,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; - pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; - - return result; -} - UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/opencl/CMakeLists.txt b/source/adapters/opencl/CMakeLists.txt index e091012bab..a7e91f75e5 100644 --- a/source/adapters/opencl/CMakeLists.txt +++ b/source/adapters/opencl/CMakeLists.txt @@ -38,7 +38,6 @@ add_ur_adapter(${TARGET_NAME} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/queue.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.hpp - ${CMAKE_CURRENT_SOURCE_DIR}/tensor_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm.cpp ${CMAKE_CURRENT_SOURCE_DIR}/usm_p2p.cpp ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp diff --git a/source/adapters/opencl/adapter.cpp b/source/adapters/opencl/adapter.cpp index 162bc59b6a..365723f14e 100644 --- a/source/adapters/opencl/adapter.cpp +++ b/source/adapters/opencl/adapter.cpp @@ -20,6 +20,7 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { #ifdef _MSC_VER + // Loading OpenCL.dll increments the libraries internal reference count. auto handle = LoadLibraryA("OpenCL.dll"); @@ -30,17 +31,17 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { // So we can safely decrement it here wihtout actually unloading OpenCL.dll. FreeLibrary(handle); -#else - // Loading libOpenCL.so to get the library handle but don't dlclose it as - // this causes a segfault when attempting to call any OpenCL entry point. - auto handle = dlopen("libOpenCL.so", RTLD_LOCAL); +#else // _MSC_VER + + // Use the default shared object search order (RTLD_DEFAULT) since the + // OpenCL-ICD-Loader has already been loaded into the process. #define CL_CORE_FUNCTION(FUNC) \ - FUNC = reinterpret_cast(dlsym(handle, #FUNC)); + FUNC = reinterpret_cast(dlsym(RTLD_DEFAULT, #FUNC)); #include "core_functions.def" #undef CL_CORE_FUNCTION -#endif +#endif // _MSC_VER } static ur_adapter_handle_t adapter = nullptr; diff --git a/source/adapters/opencl/tensor_map.cpp b/source/adapters/opencl/tensor_map.cpp deleted file mode 100644 index ea2a009f88..0000000000 --- a/source/adapters/opencl/tensor_map.cpp +++ /dev/null @@ -1,28 +0,0 @@ -//===--------- tensor_map.cpp - OpenCL Adapter ----------------------------===// -// -// Copyright (C) 2024 Intel Corporation -// -// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM -// Exceptions. See LICENSE.TXT -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include - -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const int *, const int *, uint32_t, - uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t, - ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t, - ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} -UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *, - const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *, - ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t, - ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t, - ur_exp_tensor_map_handle_t *) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 0ff80aed1e..a80c5bcca1 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -428,19 +428,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelExpProcAddrTable( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - ur_api_version_t version, ur_tensor_map_exp_dditable_t *pDdiTable) { - auto result = validateProcInputs(version, pDdiTable); - if (UR_RESULT_SUCCESS != result) { - return result; - } - - pDdiTable->pfnEncodeIm2ColExp = urTensorMapEncodeIm2ColExp; - pDdiTable->pfnEncodeTiledExp = urTensorMapEncodeTiledExp; - - return result; -} - UR_DLLEXPORT ur_result_t UR_APICALL urGetProgramExpProcAddrTable( ur_api_version_t version, ur_program_exp_dditable_t *pDdiTable) { auto result = validateProcInputs(version, pDdiTable); diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index cb3067d2d1..561b12e983 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -9202,168 +9202,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeIm2ColExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing DHW dimensions of lower box corner. - const int *PixelBoxLowerCorner, - /// [in] Array containing DHW dimensions of upper box corner. - const int *PixelBoxUpperCorner, - /// [in] Number of channels per pixel. - uint32_t ChannelsPerPixel, - /// [in] Number of pixels per column. - uint32_t PixelsPerColumn, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - auto pfnEncodeIm2ColExp = - getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; - - if (nullptr == pfnEncodeIm2ColExp) - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - - ur_tensor_map_encode_im_2_col_exp_params_t params = {&hDevice, - &TensorMapType, - &TensorRank, - &GlobalAddress, - &GlobalDim, - &GlobalStrides, - &PixelBoxLowerCorner, - &PixelBoxUpperCorner, - &ChannelsPerPixel, - &PixelsPerColumn, - &ElementStrides, - &Interleave, - &Swizzle, - &L2Promotion, - &OobFill, - &hTensorMap}; - uint64_t instance = - getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, - "urTensorMapEncodeIm2ColExp", ¶ms); - - auto &logger = getContext()->logger; - logger.info(" ---> urTensorMapEncodeIm2ColExp\n"); - - ur_result_t result = pfnEncodeIm2ColExp( - hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, - GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, ChannelsPerPixel, - PixelsPerColumn, ElementStrides, Interleave, Swizzle, L2Promotion, - OobFill, hTensorMap); - - getContext()->notify_end(UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, - "urTensorMapEncodeIm2ColExp", ¶ms, &result, - instance); - - if (logger.getLevel() <= logger::Level::INFO) { - std::ostringstream args_str; - ur::extras::printFunctionParams( - args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP, ¶ms); - logger.info(" <--- urTensorMapEncodeIm2ColExp({}) -> {};\n", - args_str.str(), result); - } - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeTiledExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing traversal box size (number of elments) along - /// each of the TensorRank dimensions. Specifies how many elements to be - /// traversed along each tensor dimension. - const uint32_t *BoxDim, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - auto pfnEncodeTiledExp = - getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; - - if (nullptr == pfnEncodeTiledExp) - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - - ur_tensor_map_encode_tiled_exp_params_t params = { - &hDevice, &TensorMapType, &TensorRank, &GlobalAddress, - &GlobalDim, &GlobalStrides, &BoxDim, &ElementStrides, - &Interleave, &Swizzle, &L2Promotion, &OobFill, - &hTensorMap}; - uint64_t instance = - getContext()->notify_begin(UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, - "urTensorMapEncodeTiledExp", ¶ms); - - auto &logger = getContext()->logger; - logger.info(" ---> urTensorMapEncodeTiledExp\n"); - - ur_result_t result = - pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, GlobalAddress, - GlobalDim, GlobalStrides, BoxDim, ElementStrides, - Interleave, Swizzle, L2Promotion, OobFill, hTensorMap); - - getContext()->notify_end(UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, - "urTensorMapEncodeTiledExp", ¶ms, &result, - instance); - - if (logger.getLevel() <= logger::Level::INFO) { - std::ostringstream args_str; - ur::extras::printFunctionParams( - args_str, UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP, ¶ms); - logger.info(" <--- urTensorMapEncodeTiledExp({}) -> {};\n", - args_str.str(), result); - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Global table /// with current process' addresses @@ -10369,40 +10207,6 @@ __urdlllocal ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's TensorMapExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -__urdlllocal ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - /// [in] API version requested - ur_api_version_t version, - /// [in,out] pointer to table of DDI function pointers - ur_tensor_map_exp_dditable_t *pDdiTable) { - auto &dditable = ur_tracing_layer::getContext()->urDdiTable.TensorMapExp; - - if (nullptr == pDdiTable) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (UR_MAJOR_VERSION(ur_tracing_layer::getContext()->version) != - UR_MAJOR_VERSION(version) || - UR_MINOR_VERSION(ur_tracing_layer::getContext()->version) > - UR_MINOR_VERSION(version)) - return UR_RESULT_ERROR_UNSUPPORTED_VERSION; - - ur_result_t result = UR_RESULT_SUCCESS; - - dditable.pfnEncodeIm2ColExp = pDdiTable->pfnEncodeIm2ColExp; - pDdiTable->pfnEncodeIm2ColExp = ur_tracing_layer::urTensorMapEncodeIm2ColExp; - - dditable.pfnEncodeTiledExp = pDdiTable->pfnEncodeTiledExp; - pDdiTable->pfnEncodeTiledExp = ur_tracing_layer::urTensorMapEncodeTiledExp; - - return result; -} -/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses /// @@ -10737,11 +10541,6 @@ ur_result_t context_t::init(ur_dditable_t *dditable, &dditable->Sampler); } - if (UR_RESULT_SUCCESS == result) { - result = ur_tracing_layer::urGetTensorMapExpProcAddrTable( - UR_API_VERSION_CURRENT, &dditable->TensorMapExp); - } - if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &dditable->USM); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 19ab908ee3..dbfd282b8a 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -9816,206 +9816,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeIm2ColExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing DHW dimensions of lower box corner. - const int *PixelBoxLowerCorner, - /// [in] Array containing DHW dimensions of upper box corner. - const int *PixelBoxUpperCorner, - /// [in] Number of channels per pixel. - uint32_t ChannelsPerPixel, - /// [in] Number of pixels per column. - uint32_t PixelsPerColumn, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - auto pfnEncodeIm2ColExp = - getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; - - if (nullptr == pfnEncodeIm2ColExp) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (getContext()->enableParameterValidation) { - if (NULL == hDevice) - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - - if (NULL == GlobalAddress) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == GlobalDim) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == GlobalStrides) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == PixelBoxLowerCorner) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == PixelBoxUpperCorner) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == ElementStrides) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == hTensorMap) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (TensorRank < 3) - return UR_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (getContext()->enableLifetimeValidation && - !getContext()->refCountContext->isReferenceValid(hDevice)) { - getContext()->refCountContext->logInvalidReference(hDevice); - } - - ur_result_t result = pfnEncodeIm2ColExp( - hDevice, TensorMapType, TensorRank, GlobalAddress, GlobalDim, - GlobalStrides, PixelBoxLowerCorner, PixelBoxUpperCorner, ChannelsPerPixel, - PixelsPerColumn, ElementStrides, Interleave, Swizzle, L2Promotion, - OobFill, hTensorMap); - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeTiledExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing traversal box size (number of elments) along - /// each of the TensorRank dimensions. Specifies how many elements to be - /// traversed along each tensor dimension. - const uint32_t *BoxDim, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - auto pfnEncodeTiledExp = - getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; - - if (nullptr == pfnEncodeTiledExp) { - return UR_RESULT_ERROR_UNINITIALIZED; - } - - if (getContext()->enableParameterValidation) { - if (NULL == hDevice) - return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - - if (NULL == GlobalAddress) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == GlobalDim) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == GlobalStrides) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == BoxDim) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == ElementStrides) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (NULL == hTensorMap) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill) - return UR_RESULT_ERROR_INVALID_ENUMERATION; - - if (TensorRank < 3) - return UR_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (getContext()->enableLifetimeValidation && - !getContext()->refCountContext->isReferenceValid(hDevice)) { - getContext()->refCountContext->logInvalidReference(hDevice); - } - - ur_result_t result = - pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, GlobalAddress, - GlobalDim, GlobalStrides, BoxDim, ElementStrides, - Interleave, Swizzle, L2Promotion, OobFill, hTensorMap); - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Global table /// with current process' addresses @@ -11046,42 +10846,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's TensorMapExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - /// [in] API version requested - ur_api_version_t version, - /// [in,out] pointer to table of DDI function pointers - ur_tensor_map_exp_dditable_t *pDdiTable) { - auto &dditable = ur_validation_layer::getContext()->urDdiTable.TensorMapExp; - - if (nullptr == pDdiTable) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (UR_MAJOR_VERSION(ur_validation_layer::getContext()->version) != - UR_MAJOR_VERSION(version) || - UR_MINOR_VERSION(ur_validation_layer::getContext()->version) > - UR_MINOR_VERSION(version)) - return UR_RESULT_ERROR_UNSUPPORTED_VERSION; - - ur_result_t result = UR_RESULT_SUCCESS; - - dditable.pfnEncodeIm2ColExp = pDdiTable->pfnEncodeIm2ColExp; - pDdiTable->pfnEncodeIm2ColExp = - ur_validation_layer::urTensorMapEncodeIm2ColExp; - - dditable.pfnEncodeTiledExp = pDdiTable->pfnEncodeTiledExp; - pDdiTable->pfnEncodeTiledExp = ur_validation_layer::urTensorMapEncodeTiledExp; - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses @@ -11438,11 +11202,6 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Sampler); } - if (UR_RESULT_SUCCESS == result) { - result = ur_validation_layer::urGetTensorMapExpProcAddrTable( - UR_API_VERSION_CURRENT, &dditable->TensorMapExp); - } - if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &dditable->USM); diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index a3a18a4170..e7b6b5993d 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -119,7 +119,6 @@ EXPORTS urGetProgramProcAddrTable urGetQueueProcAddrTable urGetSamplerProcAddrTable - urGetTensorMapExpProcAddrTable urGetUSMExpProcAddrTable urGetUSMProcAddrTable urGetUsmP2PExpProcAddrTable @@ -333,11 +332,6 @@ EXPORTS urPrintExpSamplerCubemapFilterMode urPrintExpSamplerCubemapProperties urPrintExpSamplerMipProperties - urPrintExpTensorMapDataTypeFlags - urPrintExpTensorMapInterleaveFlags - urPrintExpTensorMapL2PromotionFlags - urPrintExpTensorMapOobFillFlags - urPrintExpTensorMapSwizzleFlags urPrintExpWin32Handle urPrintFunction urPrintFunctionParams @@ -474,8 +468,6 @@ EXPORTS urPrintSamplerRetainParams urPrintSpecializationConstantInfo urPrintStructureType - urPrintTensorMapEncodeIm_2ColExpParams - urPrintTensorMapEncodeTiledExpParams urPrintUsmAdviceFlags urPrintUsmAllocInfo urPrintUsmAllocLocationDesc @@ -546,8 +538,6 @@ EXPORTS urSamplerGetNativeHandle urSamplerRelease urSamplerRetain - urTensorMapEncodeIm2ColExp - urTensorMapEncodeTiledExp urUSMDeviceAlloc urUSMFree urUSMGetMemAllocInfo diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 00a6de8c10..e9017e01cf 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -119,7 +119,6 @@ urGetProgramProcAddrTable; urGetQueueProcAddrTable; urGetSamplerProcAddrTable; - urGetTensorMapExpProcAddrTable; urGetUSMExpProcAddrTable; urGetUSMProcAddrTable; urGetUsmP2PExpProcAddrTable; @@ -333,11 +332,6 @@ urPrintExpSamplerCubemapFilterMode; urPrintExpSamplerCubemapProperties; urPrintExpSamplerMipProperties; - urPrintExpTensorMapDataTypeFlags; - urPrintExpTensorMapInterleaveFlags; - urPrintExpTensorMapL2PromotionFlags; - urPrintExpTensorMapOobFillFlags; - urPrintExpTensorMapSwizzleFlags; urPrintExpWin32Handle; urPrintFunction; urPrintFunctionParams; @@ -474,8 +468,6 @@ urPrintSamplerRetainParams; urPrintSpecializationConstantInfo; urPrintStructureType; - urPrintTensorMapEncodeIm_2ColExpParams; - urPrintTensorMapEncodeTiledExpParams; urPrintUsmAdviceFlags; urPrintUsmAllocInfo; urPrintUsmAllocLocationDesc; @@ -546,8 +538,6 @@ urSamplerGetNativeHandle; urSamplerRelease; urSamplerRetain; - urTensorMapEncodeIm2ColExp; - urTensorMapEncodeTiledExp; urUSMDeviceAlloc; urUSMFree; urUSMGetMemAllocInfo; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 81360c7126..450f5f9364 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -9363,149 +9363,6 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeIm2ColExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing DHW dimensions of lower box corner. - const int *PixelBoxLowerCorner, - /// [in] Array containing DHW dimensions of upper box corner. - const int *PixelBoxUpperCorner, - /// [in] Number of channels per pixel. - uint32_t ChannelsPerPixel, - /// [in] Number of pixels per column. - uint32_t PixelsPerColumn, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - ur_result_t result = UR_RESULT_SUCCESS; - - [[maybe_unused]] auto context = getContext(); - - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnEncodeIm2ColExp = dditable->ur.TensorMapExp.pfnEncodeIm2ColExp; - if (nullptr == pfnEncodeIm2ColExp) - return UR_RESULT_ERROR_UNINITIALIZED; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // forward to device-platform - result = pfnEncodeIm2ColExp(hDevice, TensorMapType, TensorRank, GlobalAddress, - GlobalDim, GlobalStrides, PixelBoxLowerCorner, - PixelBoxUpperCorner, ChannelsPerPixel, - PixelsPerColumn, ElementStrides, Interleave, - Swizzle, L2Promotion, OobFill, hTensorMap); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *hTensorMap = reinterpret_cast( - context->factories.ur_exp_tensor_map_factory.getInstance(*hTensorMap, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Intercept function for urTensorMapEncodeTiledExp -__urdlllocal ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing traversal box size (number of elments) along - /// each of the TensorRank dimensions. Specifies how many elements to be - /// traversed along each tensor dimension. - const uint32_t *BoxDim, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - ur_result_t result = UR_RESULT_SUCCESS; - - [[maybe_unused]] auto context = getContext(); - - // extract platform's function pointer table - auto dditable = reinterpret_cast(hDevice)->dditable; - auto pfnEncodeTiledExp = dditable->ur.TensorMapExp.pfnEncodeTiledExp; - if (nullptr == pfnEncodeTiledExp) - return UR_RESULT_ERROR_UNINITIALIZED; - - // convert loader handle to platform handle - hDevice = reinterpret_cast(hDevice)->handle; - - // forward to device-platform - result = - pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, GlobalAddress, - GlobalDim, GlobalStrides, BoxDim, ElementStrides, - Interleave, Swizzle, L2Promotion, OobFill, hTensorMap); - - if (UR_RESULT_SUCCESS != result) - return result; - - try { - // convert platform handle to loader handle - *hTensorMap = reinterpret_cast( - context->factories.ur_exp_tensor_map_factory.getInstance(*hTensorMap, - dditable)); - } catch (std::bad_alloc &) { - result = UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return result; -} - } // namespace ur_loader #if defined(__cplusplus) @@ -10559,60 +10416,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetSamplerProcAddrTable( return result; } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Exported function for filling application's TensorMapExp table -/// with current process' addresses -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION -UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable( - /// [in] API version requested - ur_api_version_t version, - /// [in,out] pointer to table of DDI function pointers - ur_tensor_map_exp_dditable_t *pDdiTable) { - if (nullptr == pDdiTable) - return UR_RESULT_ERROR_INVALID_NULL_POINTER; - - if (ur_loader::getContext()->version < version) - return UR_RESULT_ERROR_UNSUPPORTED_VERSION; - - ur_result_t result = UR_RESULT_SUCCESS; - - // Load the device-platform DDI tables - for (auto &platform : ur_loader::getContext()->platforms) { - // statically linked adapter inside of the loader - if (platform.handle == nullptr) - continue; - - if (platform.initStatus != UR_RESULT_SUCCESS) - continue; - auto getTable = reinterpret_cast( - ur_loader::LibLoader::getFunctionPtr(platform.handle.get(), - "urGetTensorMapExpProcAddrTable")); - if (!getTable) - continue; - platform.initStatus = getTable(version, &platform.dditable.ur.TensorMapExp); - } - - if (UR_RESULT_SUCCESS == result) { - if (ur_loader::getContext()->platforms.size() != 1 || - ur_loader::getContext()->forceIntercept) { - // return pointers to loader's DDIs - pDdiTable->pfnEncodeIm2ColExp = ur_loader::urTensorMapEncodeIm2ColExp; - pDdiTable->pfnEncodeTiledExp = ur_loader::urTensorMapEncodeTiledExp; - } else { - // return pointers directly to platform's DDIs - *pDdiTable = - ur_loader::getContext()->platforms.front().dditable.ur.TensorMapExp; - } - } - - return result; -} - /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's USM table /// with current process' addresses diff --git a/source/loader/ur_ldrddi.hpp b/source/loader/ur_ldrddi.hpp index e1a5e69c42..1ca7d18bc2 100644 --- a/source/loader/ur_ldrddi.hpp +++ b/source/loader/ur_ldrddi.hpp @@ -89,10 +89,6 @@ using ur_exp_command_buffer_command_factory_t = singleton_factory_t; -using ur_exp_tensor_map_object_t = object_t; -using ur_exp_tensor_map_factory_t = - singleton_factory_t; - struct handle_factories { ur_adapter_factory_t ur_adapter_factory; ur_platform_factory_t ur_platform_factory; @@ -110,7 +106,6 @@ struct handle_factories { ur_exp_external_semaphore_factory_t ur_exp_external_semaphore_factory; ur_exp_command_buffer_factory_t ur_exp_command_buffer_factory; ur_exp_command_buffer_command_factory_t ur_exp_command_buffer_command_factory; - ur_exp_tensor_map_factory_t ur_exp_tensor_map_factory; }; } // namespace ur_loader diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 3ca3e3a56f..031b4f3989 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -9830,158 +9830,4 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( return exceptionToResult(std::current_exception()); } -/////////////////////////////////////////////////////////////////////////////// -/// @brief Encode tensor map with image data -/// -/// @details -/// - Map encode using im2col. -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hDevice` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` -/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` -/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` -/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` -/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == GlobalAddress` -/// + `NULL == GlobalDim` -/// + `NULL == GlobalStrides` -/// + `NULL == PixelBoxLowerCorner` -/// + `NULL == PixelBoxUpperCorner` -/// + `NULL == ElementStrides` -/// + `NULL == hTensorMap` -/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT -/// + `TensorRank < 3` -ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing DHW dimensions of lower box corner. - const int *PixelBoxLowerCorner, - /// [in] Array containing DHW dimensions of upper box corner. - const int *PixelBoxUpperCorner, - /// [in] Number of channels per pixel. - uint32_t ChannelsPerPixel, - /// [in] Number of pixels per column. - uint32_t PixelsPerColumn, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) try { - auto pfnEncodeIm2ColExp = - ur_lib::getContext()->urDdiTable.TensorMapExp.pfnEncodeIm2ColExp; - if (nullptr == pfnEncodeIm2ColExp) - return UR_RESULT_ERROR_UNINITIALIZED; - - return pfnEncodeIm2ColExp(hDevice, TensorMapType, TensorRank, GlobalAddress, - GlobalDim, GlobalStrides, PixelBoxLowerCorner, - PixelBoxUpperCorner, ChannelsPerPixel, - PixelsPerColumn, ElementStrides, Interleave, - Swizzle, L2Promotion, OobFill, hTensorMap); -} catch (...) { - return exceptionToResult(std::current_exception()); -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Encode tensor map with tiled data -/// -/// @details -/// - Tiled map encode. -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hDevice` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` -/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` -/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` -/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` -/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == GlobalAddress` -/// + `NULL == GlobalDim` -/// + `NULL == GlobalStrides` -/// + `NULL == BoxDim` -/// + `NULL == ElementStrides` -/// + `NULL == hTensorMap` -/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT -/// + `TensorRank < 3` -ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing traversal box size (number of elments) along - /// each of the TensorRank dimensions. Specifies how many elements to be - /// traversed along each tensor dimension. - const uint32_t *BoxDim, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) try { - auto pfnEncodeTiledExp = - ur_lib::getContext()->urDdiTable.TensorMapExp.pfnEncodeTiledExp; - if (nullptr == pfnEncodeTiledExp) - return UR_RESULT_ERROR_UNINITIALIZED; - - return pfnEncodeTiledExp(hDevice, TensorMapType, TensorRank, GlobalAddress, - GlobalDim, GlobalStrides, BoxDim, ElementStrides, - Interleave, Swizzle, L2Promotion, OobFill, - hTensorMap); -} catch (...) { - return exceptionToResult(std::current_exception()); -} - } // extern "C" diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index 9a5493c425..c3e9b613ae 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -99,11 +99,6 @@ __urdlllocal ur_result_t context_t::ddiInit() { urGetSamplerProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Sampler); } - if (UR_RESULT_SUCCESS == result) { - result = urGetTensorMapExpProcAddrTable(UR_API_VERSION_CURRENT, - &urDdiTable.TensorMapExp); - } - if (UR_RESULT_SUCCESS == result) { result = urGetUSMProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.USM); } diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 3fda2c2081..824a6a161a 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1119,49 +1119,6 @@ ur_result_t urPrintExpEnqueueNativeCommandProperties( return str_copy(&ss, buffer, buff_size, out_size); } -ur_result_t -urPrintExpTensorMapDataTypeFlags(enum ur_exp_tensor_map_data_type_flag_t value, - char *buffer, const size_t buff_size, - size_t *out_size) { - std::stringstream ss; - ss << value; - return str_copy(&ss, buffer, buff_size, out_size); -} - -ur_result_t urPrintExpTensorMapInterleaveFlags( - enum ur_exp_tensor_map_interleave_flag_t value, char *buffer, - const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << value; - return str_copy(&ss, buffer, buff_size, out_size); -} - -ur_result_t urPrintExpTensorMapL2PromotionFlags( - enum ur_exp_tensor_map_l2_promotion_flag_t value, char *buffer, - const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << value; - return str_copy(&ss, buffer, buff_size, out_size); -} - -ur_result_t -urPrintExpTensorMapSwizzleFlags(enum ur_exp_tensor_map_swizzle_flag_t value, - char *buffer, const size_t buff_size, - size_t *out_size) { - std::stringstream ss; - ss << value; - return str_copy(&ss, buffer, buff_size, out_size); -} - -ur_result_t -urPrintExpTensorMapOobFillFlags(enum ur_exp_tensor_map_oob_fill_flag_t value, - char *buffer, const size_t buff_size, - size_t *out_size) { - std::stringstream ss; - ss << value; - return str_copy(&ss, buffer, buff_size, out_size); -} - ur_result_t urPrintAdapterGetParams(const struct ur_adapter_get_params_t *params, char *buffer, const size_t buff_size, @@ -2569,22 +2526,6 @@ ur_result_t urPrintSamplerCreateWithNativeHandleParams( return str_copy(&ss, buffer, buff_size, out_size); } -ur_result_t urPrintTensorMapEncodeIm_2ColExpParams( - const struct ur_tensor_map_encode_im_2_col_exp_params_t *params, - char *buffer, const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << params; - return str_copy(&ss, buffer, buff_size, out_size); -} - -ur_result_t urPrintTensorMapEncodeTiledExpParams( - const struct ur_tensor_map_encode_tiled_exp_params_t *params, char *buffer, - const size_t buff_size, size_t *out_size) { - std::stringstream ss; - ss << params; - return str_copy(&ss, buffer, buff_size, out_size); -} - ur_result_t urPrintUsmHostAllocParams(const struct ur_usm_host_alloc_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 55ecf2abd9..b9d0c3c390 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -8581,138 +8581,3 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( ur_result_t result = UR_RESULT_SUCCESS; return result; } - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Encode tensor map with image data -/// -/// @details -/// - Map encode using im2col. -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hDevice` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` -/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` -/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` -/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` -/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == GlobalAddress` -/// + `NULL == GlobalDim` -/// + `NULL == GlobalStrides` -/// + `NULL == PixelBoxLowerCorner` -/// + `NULL == PixelBoxUpperCorner` -/// + `NULL == ElementStrides` -/// + `NULL == hTensorMap` -/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT -/// + `TensorRank < 3` -ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing DHW dimensions of lower box corner. - const int *PixelBoxLowerCorner, - /// [in] Array containing DHW dimensions of upper box corner. - const int *PixelBoxUpperCorner, - /// [in] Number of channels per pixel. - uint32_t ChannelsPerPixel, - /// [in] Number of pixels per column. - uint32_t PixelsPerColumn, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - ur_result_t result = UR_RESULT_SUCCESS; - return result; -} - -/////////////////////////////////////////////////////////////////////////////// -/// @brief Encode tensor map with tiled data -/// -/// @details -/// - Tiled map encode. -/// -/// @returns -/// - ::UR_RESULT_SUCCESS -/// - ::UR_RESULT_ERROR_UNINITIALIZED -/// - ::UR_RESULT_ERROR_DEVICE_LOST -/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC -/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE -/// + `NULL == hDevice` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType` -/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave` -/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle` -/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion` -/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill` -/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == GlobalAddress` -/// + `NULL == GlobalDim` -/// + `NULL == GlobalStrides` -/// + `NULL == BoxDim` -/// + `NULL == ElementStrides` -/// + `NULL == hTensorMap` -/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT -/// + `TensorRank < 3` -ur_result_t UR_APICALL urTensorMapEncodeTiledExp( - /// [in] Handle of the device object. - ur_device_handle_t hDevice, - /// [in] Data type of the tensor object. - ur_exp_tensor_map_data_type_flags_t TensorMapType, - /// [in] Dimensionality of tensor; must be at least 3. - uint32_t TensorRank, - /// [in] Starting address of memory region described by tensor. - void *GlobalAddress, - /// [in] Array containing tensor size (number of elements) along each of - /// the TensorRank dimensions. - const uint64_t *GlobalDim, - /// [in] Array containing stride size (in bytes) along each of the - /// TensorRank - 1 dimensions. - const uint64_t *GlobalStrides, - /// [in] Array containing traversal box size (number of elments) along - /// each of the TensorRank dimensions. Specifies how many elements to be - /// traversed along each tensor dimension. - const uint32_t *BoxDim, - /// [in] Array containing traversal stride in each of the TensorRank - /// dimensions. - const uint32_t *ElementStrides, - /// [in] Type of interleaved layout the tensor addresses - ur_exp_tensor_map_interleave_flags_t Interleave, - /// [in] Bank swizzling pattern inside shared memory - ur_exp_tensor_map_swizzle_flags_t Swizzle, - /// [in] L2 promotion size. - ur_exp_tensor_map_l2_promotion_flags_t L2Promotion, - /// [in] Indicates whether zero or special NaN constant will be used to - /// fill out-of-bounds elements. - ur_exp_tensor_map_oob_fill_flags_t OobFill, - /// [out] Handle of the tensor map object. - ur_exp_tensor_map_handle_t *hTensorMap) { - ur_result_t result = UR_RESULT_SUCCESS; - return result; -} diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index b031ccb376..20f78c2d8c 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -41,8 +41,9 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchKernelWgSizeTest); // Note: Due to an issue with HIP, the subgroup test is not generated struct urEnqueueKernelLaunchKernelSubGroupTest : uur::urKernelExecutionTest { void SetUp() override { - UUR_KNOWN_FAILURE_ON(uur::CUDA{}, uur::HIP{}, uur::LevelZero{}, - uur::LevelZeroV2{}); + // Subgroup size of 8 isn't supported on the Data Center GPU Max + UUR_KNOWN_FAILURE_ON(uur::HIP{}, uur::LevelZero{"Data Center GPU Max"}, + uur::LevelZeroV2{"Data Center GPU Max"}); program_name = "subgroup"; UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp()); @@ -191,7 +192,7 @@ TEST_P(urEnqueueKernelLaunchKernelWgSizeTest, NonMatchingLocalSize) { } TEST_P(urEnqueueKernelLaunchKernelSubGroupTest, Success) { - UUR_KNOWN_FAILURE_ON(uur::LevelZero{}); + UUR_KNOWN_FAILURE_ON(uur::CUDA{}); ur_mem_handle_t buffer = nullptr; AddBuffer1DArg(sizeof(size_t), &buffer);