Skip to content

Commit

Permalink
Merge branch 'main' into add-command-buffer-support
Browse files Browse the repository at this point in the history
  • Loading branch information
lslusarczyk authored Jan 28, 2025
2 parents 0bc9f02 + 3a1b4c7 commit 311c7db
Show file tree
Hide file tree
Showing 40 changed files with 12 additions and 3,073 deletions.
299 changes: 0 additions & 299 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,6 @@ typedef enum ur_function_t {
UR_FUNCTION_BINDLESS_IMAGES_MAP_EXTERNAL_LINEAR_MEMORY_EXP = 245,
/// Enumerator for ::urEnqueueEventsWaitWithBarrierExt
UR_FUNCTION_ENQUEUE_EVENTS_WAIT_WITH_BARRIER_EXT = 246,
/// Enumerator for ::urTensorMapEncodeIm2ColExp
UR_FUNCTION_TENSOR_MAP_ENCODE_IM_2_COL_EXP = 247,
/// Enumerator for ::urTensorMapEncodeTiledExp
UR_FUNCTION_TENSOR_MAP_ENCODE_TILED_EXP = 248,
/// Enumerator for ::urPhysicalMemGetInfo
UR_FUNCTION_PHYSICAL_MEM_GET_INFO = 249,
/// @cond
Expand Down Expand Up @@ -12133,258 +12129,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueNativeCommandExp(
/// array.
ur_event_handle_t *phEvent);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental API for mapping tensor objects
#if !defined(__GNUC__)
#pragma region tensor_map_(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Handle of tensor map object
typedef struct ur_exp_tensor_map_handle_t_ *ur_exp_tensor_map_handle_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map data type
typedef uint32_t ur_exp_tensor_map_data_type_flags_t;
typedef enum ur_exp_tensor_map_data_type_flag_t {
/// 1 byte
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT8 = UR_BIT(0),
/// 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT16 = UR_BIT(1),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT32 = UR_BIT(2),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT32 = UR_BIT(3),
/// 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_UINT64 = UR_BIT(4),
/// 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_INT64 = UR_BIT(5),
/// 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT16 = UR_BIT(6),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32 = UR_BIT(7),
/// 8 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT64 = UR_BIT(8),
/// 2 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_BFLOAT16 = UR_BIT(9),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FLOAT32_FTZ = UR_BIT(10),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32 = UR_BIT(11),
/// 4 bytes
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_TFLOAT32_FTZ = UR_BIT(12),
/// @cond
UR_EXP_TENSOR_MAP_DATA_TYPE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_data_type_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_data_type_flags_t
#define UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK 0xffffe000

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map interleave
typedef uint32_t ur_exp_tensor_map_interleave_flags_t;
typedef enum ur_exp_tensor_map_interleave_flag_t {
/// No interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_NONE = UR_BIT(0),
/// 16B interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_16B = UR_BIT(1),
/// 32B interleave
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_32B = UR_BIT(2),
/// @cond
UR_EXP_TENSOR_MAP_INTERLEAVE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_interleave_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_interleave_flags_t
#define UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK 0xfffffff8

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map l2 promotion
typedef uint32_t ur_exp_tensor_map_l2_promotion_flags_t;
typedef enum ur_exp_tensor_map_l2_promotion_flag_t {
/// No promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_NONE = UR_BIT(0),
/// 64B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_64B = UR_BIT(1),
/// 128B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_128B = UR_BIT(2),
/// 256B promotion type
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_256B = UR_BIT(3),
/// @cond
UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_l2_promotion_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_l2_promotion_flags_t
#define UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK 0xfffffff0

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map swizzle
typedef uint32_t ur_exp_tensor_map_swizzle_flags_t;
typedef enum ur_exp_tensor_map_swizzle_flag_t {
/// No swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_NONE = UR_BIT(0),
/// 32B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_32B = UR_BIT(1),
/// 64B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_64B = UR_BIT(2),
/// 128B swizzle
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_128B = UR_BIT(3),
/// @cond
UR_EXP_TENSOR_MAP_SWIZZLE_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_swizzle_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_swizzle_flags_t
#define UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK 0xfffffff0

///////////////////////////////////////////////////////////////////////////////
/// @brief Tensor map OOB fill
typedef uint32_t ur_exp_tensor_map_oob_fill_flags_t;
typedef enum ur_exp_tensor_map_oob_fill_flag_t {
/// No OOB fill
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_NONE = UR_BIT(0),
/// Refer to NVIDIA docs
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_REQUEST_ZERO_FMA = UR_BIT(1),
/// @cond
UR_EXP_TENSOR_MAP_OOB_FILL_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_tensor_map_oob_fill_flag_t;
/// @brief Bit Mask for validating ur_exp_tensor_map_oob_fill_flags_t
#define UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK 0xfffffffc

///////////////////////////////////////////////////////////////////////////////
/// @brief Encode tensor map with image data
///
/// @details
/// - Map encode using im2col.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType`
/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave`
/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle`
/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion`
/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == GlobalAddress`
/// + `NULL == GlobalDim`
/// + `NULL == GlobalStrides`
/// + `NULL == PixelBoxLowerCorner`
/// + `NULL == PixelBoxUpperCorner`
/// + `NULL == ElementStrides`
/// + `NULL == hTensorMap`
/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT
/// + `TensorRank < 3`
UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeIm2ColExp(
/// [in] Handle of the device object.
ur_device_handle_t hDevice,
/// [in] Data type of the tensor object.
ur_exp_tensor_map_data_type_flags_t TensorMapType,
/// [in] Dimensionality of tensor; must be at least 3.
uint32_t TensorRank,
/// [in] Starting address of memory region described by tensor.
void *GlobalAddress,
/// [in] Array containing tensor size (number of elements) along each of
/// the TensorRank dimensions.
const uint64_t *GlobalDim,
/// [in] Array containing stride size (in bytes) along each of the
/// TensorRank - 1 dimensions.
const uint64_t *GlobalStrides,
/// [in] Array containing DHW dimensions of lower box corner.
const int *PixelBoxLowerCorner,
/// [in] Array containing DHW dimensions of upper box corner.
const int *PixelBoxUpperCorner,
/// [in] Number of channels per pixel.
uint32_t ChannelsPerPixel,
/// [in] Number of pixels per column.
uint32_t PixelsPerColumn,
/// [in] Array containing traversal stride in each of the TensorRank
/// dimensions.
const uint32_t *ElementStrides,
/// [in] Type of interleaved layout the tensor addresses
ur_exp_tensor_map_interleave_flags_t Interleave,
/// [in] Bank swizzling pattern inside shared memory
ur_exp_tensor_map_swizzle_flags_t Swizzle,
/// [in] L2 promotion size.
ur_exp_tensor_map_l2_promotion_flags_t L2Promotion,
/// [in] Indicates whether zero or special NaN constant will be used to
/// fill out-of-bounds elements.
ur_exp_tensor_map_oob_fill_flags_t OobFill,
/// [out] Handle of the tensor map object.
ur_exp_tensor_map_handle_t *hTensorMap);

///////////////////////////////////////////////////////////////////////////////
/// @brief Encode tensor map with tiled data
///
/// @details
/// - Tiled map encode.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_EXP_TENSOR_MAP_DATA_TYPE_FLAGS_MASK & TensorMapType`
/// + `::UR_EXP_TENSOR_MAP_INTERLEAVE_FLAGS_MASK & Interleave`
/// + `::UR_EXP_TENSOR_MAP_SWIZZLE_FLAGS_MASK & Swizzle`
/// + `::UR_EXP_TENSOR_MAP_L2_PROMOTION_FLAGS_MASK & L2Promotion`
/// + `::UR_EXP_TENSOR_MAP_OOB_FILL_FLAGS_MASK & OobFill`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == GlobalAddress`
/// + `NULL == GlobalDim`
/// + `NULL == GlobalStrides`
/// + `NULL == BoxDim`
/// + `NULL == ElementStrides`
/// + `NULL == hTensorMap`
/// - ::UR_RESULT_ERROR_INVALID_ARGUMENT
/// + `TensorRank < 3`
UR_APIEXPORT ur_result_t UR_APICALL urTensorMapEncodeTiledExp(
/// [in] Handle of the device object.
ur_device_handle_t hDevice,
/// [in] Data type of the tensor object.
ur_exp_tensor_map_data_type_flags_t TensorMapType,
/// [in] Dimensionality of tensor; must be at least 3.
uint32_t TensorRank,
/// [in] Starting address of memory region described by tensor.
void *GlobalAddress,
/// [in] Array containing tensor size (number of elements) along each of
/// the TensorRank dimensions.
const uint64_t *GlobalDim,
/// [in] Array containing stride size (in bytes) along each of the
/// TensorRank - 1 dimensions.
const uint64_t *GlobalStrides,
/// [in] Array containing traversal box size (number of elments) along
/// each of the TensorRank dimensions. Specifies how many elements to be
/// traversed along each tensor dimension.
const uint32_t *BoxDim,
/// [in] Array containing traversal stride in each of the TensorRank
/// dimensions.
const uint32_t *ElementStrides,
/// [in] Type of interleaved layout the tensor addresses
ur_exp_tensor_map_interleave_flags_t Interleave,
/// [in] Bank swizzling pattern inside shared memory
ur_exp_tensor_map_swizzle_flags_t Swizzle,
/// [in] L2 promotion size.
ur_exp_tensor_map_l2_promotion_flags_t L2Promotion,
/// [in] Indicates whether zero or special NaN constant will be used to
/// fill out-of-bounds elements.
ur_exp_tensor_map_oob_fill_flags_t OobFill,
/// [out] Handle of the tensor map object.
ur_exp_tensor_map_handle_t *hTensorMap);

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down Expand Up @@ -14574,49 +14318,6 @@ typedef struct ur_command_buffer_command_get_info_exp_params_t {
size_t **ppPropSizeRet;
} ur_command_buffer_command_get_info_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urTensorMapEncodeIm2ColExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_tensor_map_encode_im_2_col_exp_params_t {
ur_device_handle_t *phDevice;
ur_exp_tensor_map_data_type_flags_t *pTensorMapType;
uint32_t *pTensorRank;
void **pGlobalAddress;
const uint64_t **pGlobalDim;
const uint64_t **pGlobalStrides;
const int **pPixelBoxLowerCorner;
const int **pPixelBoxUpperCorner;
uint32_t *pChannelsPerPixel;
uint32_t *pPixelsPerColumn;
const uint32_t **pElementStrides;
ur_exp_tensor_map_interleave_flags_t *pInterleave;
ur_exp_tensor_map_swizzle_flags_t *pSwizzle;
ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion;
ur_exp_tensor_map_oob_fill_flags_t *pOobFill;
ur_exp_tensor_map_handle_t **phTensorMap;
} ur_tensor_map_encode_im_2_col_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urTensorMapEncodeTiledExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_tensor_map_encode_tiled_exp_params_t {
ur_device_handle_t *phDevice;
ur_exp_tensor_map_data_type_flags_t *pTensorMapType;
uint32_t *pTensorRank;
void **pGlobalAddress;
const uint64_t **pGlobalDim;
const uint64_t **pGlobalStrides;
const uint32_t **pBoxDim;
const uint32_t **pElementStrides;
ur_exp_tensor_map_interleave_flags_t *pInterleave;
ur_exp_tensor_map_swizzle_flags_t *pSwizzle;
ur_exp_tensor_map_l2_promotion_flags_t *pL2Promotion;
ur_exp_tensor_map_oob_fill_flags_t *pOobFill;
ur_exp_tensor_map_handle_t **phTensorMap;
} ur_tensor_map_encode_tiled_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urUsmP2PEnablePeerAccessExp
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
2 changes: 0 additions & 2 deletions include/ur_api_funcs.def
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,6 @@ _UR_API(urCommandBufferUpdateSignalEventExp)
_UR_API(urCommandBufferUpdateWaitEventsExp)
_UR_API(urCommandBufferGetInfoExp)
_UR_API(urCommandBufferCommandGetInfoExp)
_UR_API(urTensorMapEncodeIm2ColExp)
_UR_API(urTensorMapEncodeTiledExp)
_UR_API(urUsmP2PEnablePeerAccessExp)
_UR_API(urUsmP2PDisablePeerAccessExp)
_UR_API(urUsmP2PPeerAccessGetInfoExp)
Expand Down
46 changes: 0 additions & 46 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -1685,51 +1685,6 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
typedef ur_result_t(UR_APICALL *ur_pfnGetCommandBufferExpProcAddrTable_t)(
ur_api_version_t, ur_command_buffer_exp_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urTensorMapEncodeIm2ColExp
typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeIm2ColExp_t)(
ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *,
const uint64_t *, const uint64_t *, const int *, const int *, uint32_t,
uint32_t, const uint32_t *, ur_exp_tensor_map_interleave_flags_t,
ur_exp_tensor_map_swizzle_flags_t, ur_exp_tensor_map_l2_promotion_flags_t,
ur_exp_tensor_map_oob_fill_flags_t, ur_exp_tensor_map_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urTensorMapEncodeTiledExp
typedef ur_result_t(UR_APICALL *ur_pfnTensorMapEncodeTiledExp_t)(
ur_device_handle_t, ur_exp_tensor_map_data_type_flags_t, uint32_t, void *,
const uint64_t *, const uint64_t *, const uint32_t *, const uint32_t *,
ur_exp_tensor_map_interleave_flags_t, ur_exp_tensor_map_swizzle_flags_t,
ur_exp_tensor_map_l2_promotion_flags_t, ur_exp_tensor_map_oob_fill_flags_t,
ur_exp_tensor_map_handle_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Table of TensorMapExp functions pointers
typedef struct ur_tensor_map_exp_dditable_t {
ur_pfnTensorMapEncodeIm2ColExp_t pfnEncodeIm2ColExp;
ur_pfnTensorMapEncodeTiledExp_t pfnEncodeTiledExp;
} ur_tensor_map_exp_dditable_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Exported function for filling application's TensorMapExp table
/// with current process' addresses
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
UR_DLLEXPORT ur_result_t UR_APICALL urGetTensorMapExpProcAddrTable(
/// [in] API version requested
ur_api_version_t version,
/// [in,out] pointer to table of DDI function pointers
ur_tensor_map_exp_dditable_t *pDdiTable);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urGetTensorMapExpProcAddrTable
typedef ur_result_t(UR_APICALL *ur_pfnGetTensorMapExpProcAddrTable_t)(
ur_api_version_t, ur_tensor_map_exp_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urUsmP2PEnablePeerAccessExp
typedef ur_result_t(UR_APICALL *ur_pfnUsmP2PEnablePeerAccessExp_t)(
Expand Down Expand Up @@ -1948,7 +1903,6 @@ typedef struct ur_dditable_t {
ur_usm_dditable_t USM;
ur_usm_exp_dditable_t USMExp;
ur_command_buffer_exp_dditable_t CommandBufferExp;
ur_tensor_map_exp_dditable_t TensorMapExp;
ur_usm_p2p_exp_dditable_t UsmP2PExp;
ur_virtual_mem_dditable_t VirtualMem;
ur_device_dditable_t Device;
Expand Down
Loading

0 comments on commit 311c7db

Please sign in to comment.