From 407b3b77fefdfd64a3b435baf8ca07382c3b52f9 Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Thu, 29 Aug 2024 17:45:04 -0400 Subject: [PATCH 01/10] * rocDecode: Removed the hard coded HW decoder capability info. - We now probe HW decoder capabilities through VA-API from the driver. --- api/rocdecode.h | 1 - src/rocdecode/roc_decoder_caps.h | 251 +++++++++++++++------ src/rocdecode/rocdecode_api.cpp | 2 +- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 3 +- 4 files changed, 188 insertions(+), 69 deletions(-) diff --git a/api/rocdecode.h b/api/rocdecode.h index 39f30f2e..a8c98bd2 100644 --- a/api/rocdecode.h +++ b/api/rocdecode.h @@ -150,7 +150,6 @@ typedef struct _RocdecDecodeCaps { uint32_t bit_depth_minus_8; /**< IN: The Value "BitDepth minus 8" */ uint32_t reserved_1[3]; /**< Reserved for future use - set to zero */ uint8_t is_supported; /**< OUT: 1 if codec supported, 0 if not supported */ - uint8_t num_decoders; /**< OUT: Number of Decoders that can support IN params */ uint16_t output_format_mask; /**< OUT: each bit represents corresponding rocDecVideoSurfaceFormat enum */ uint32_t max_width; /**< OUT: Max supported coded width in pixels */ uint32_t max_height; /**< OUT: Max supported coded height in pixels */ diff --git a/src/rocdecode/roc_decoder_caps.h b/src/rocdecode/roc_decoder_caps.h index cce6c5a5..b7fcae61 100644 --- a/src/rocdecode/roc_decoder_caps.h +++ b/src/rocdecode/roc_decoder_caps.h @@ -29,11 +29,19 @@ THE SOFTWARE. #include "../commons.h" #include "../../api/rocdecode.h" +#define CHECK_VAAPI(call) {\ + VAStatus va_status = call;\ + if (va_status != VA_STATUS_SUCCESS) {\ + std::cout << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCDEC_RUNTIME_ERROR;\ + }\ +} // The CodecSpec struct contains information for an individual codec (e.g., rocDecVideoCodec_HEVC) struct CodecSpec { + rocDecVideoCodec codec_type; std::vector chroma_format; - std::vector bitdepth_minus8; + int max_bit_depth; uint16_t output_format_mask; uint32_t max_width; uint32_t max_height; @@ -41,12 +49,6 @@ struct CodecSpec { uint16_t min_height; }; -// The VcnCodecsSpec struct contains information for all supported codecs and number of vcn instances per device -struct VcnCodecsSpec { - std::unordered_map codecs_spec; - uint8_t num_decoders; -}; - // The RocDecVcnCodecSpec singleton class for providing access to the the vcn_spec_table class RocDecVcnCodecSpec { public: @@ -54,55 +56,187 @@ class RocDecVcnCodecSpec { static RocDecVcnCodecSpec instance; return instance; } - rocDecStatus GetDecoderCaps(std::string gcn_arch_name, RocdecDecodeCaps *pdc) { - std::lock_guard lock(mutex); - std::size_t pos = gcn_arch_name.find_first_of(":"); - std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; - auto it = vcn_spec_table.find(gcn_arch_name_base); - if (it != vcn_spec_table.end()) { - const VcnCodecsSpec& vcn_spec = it->second; - auto it1 = vcn_spec.codecs_spec.find(pdc->codec_type); - if (it1 != vcn_spec.codecs_spec.end()) { - const CodecSpec& codec_spec = it1->second; - auto it_chroma_format = std::find(codec_spec.chroma_format.begin(), codec_spec.chroma_format.end(), pdc->chroma_format); - auto it_bitdepth_minus8 = std::find(codec_spec.bitdepth_minus8.begin(), codec_spec.bitdepth_minus8.end(), pdc->bit_depth_minus_8); - if (it_chroma_format != codec_spec.chroma_format.end() && it_bitdepth_minus8 != codec_spec.bitdepth_minus8.end()) { - pdc->is_supported = 1; - pdc->num_decoders = vcn_spec.num_decoders; - pdc->output_format_mask = codec_spec.output_format_mask; - pdc->max_width = codec_spec.max_width; - pdc->max_height = codec_spec.max_height; - pdc->min_width = codec_spec.min_width; - pdc->min_height = codec_spec.min_height; - return ROCDEC_SUCCESS; - } else { - return ROCDEC_NOT_SUPPORTED; + rocDecStatus ProbeHwDecodeCapabilities() { + std::string drm_node = "/dev/dri/renderD128"; // look at device_id 0 + int drm_fd = open(drm_node.c_str(), O_RDWR); + if (drm_fd < 0) { + ERR("Failed to open drm node." + drm_node); + return ROCDEC_DEVICE_INVALID; + } + VADisplay va_display = vaGetDisplayDRM(drm_fd); + if (!va_display) { + ERR("Failed to create va_display."); + return ROCDEC_DEVICE_INVALID; + } + int major_version = 0, minor_version = 0; + CHECK_VAAPI(vaInitialize(va_display, &major_version, &minor_version)); + + int num_profiles = 0; + std::vector profile_list; + num_profiles = vaMaxNumProfiles(va_display); + profile_list.resize(num_profiles); + CHECK_VAAPI(vaQueryConfigProfiles(va_display, profile_list.data(), &num_profiles)); + + // To simplify, merge all profile attributes into one codec type. + rocDecVideoCodec codec_type; + rocDecVideoChromaFormat chroma_format; + int bit_depth; + for (int i = 0; i < num_profiles; i++) { + bool interested = false; + bit_depth = 8; + switch (profile_list[i]) { + case VAProfileH264Main: + case VAProfileH264High: + case VAProfileH264ConstrainedBaseline: + codec_type = rocDecVideoCodec_AVC; + chroma_format = rocDecVideoChromaFormat_420; + interested = true; + break; + + case VAProfileHEVCMain10: + bit_depth = 10; + case VAProfileHEVCMain: + codec_type = rocDecVideoCodec_HEVC; + chroma_format = rocDecVideoChromaFormat_420; + interested = true; + break; + + case VAProfileAV1Profile0: + codec_type = rocDecVideoCodec_AV1; + chroma_format = rocDecVideoChromaFormat_420; + bit_depth = 10; // both 8 and 10 bit + interested = true; + break; + + default: + break; + } + + if (interested) { + int j = 0; + for (j = 0; j < decode_cap_list_.size(); j++) { + if (decode_cap_list_[j].codec_type == codec_type) { + break; + } } + if (decode_cap_list_.size() == 0 || (decode_cap_list_.size() && j == decode_cap_list_.size())) { + decode_cap_list_.resize(decode_cap_list_.size() + 1, {}); + } + decode_cap_list_[j].codec_type = codec_type; + if (decode_cap_list_[j].max_bit_depth < bit_depth) { + decode_cap_list_[j].max_bit_depth = bit_depth; + } + auto it_chroma_format = std::find(decode_cap_list_[j].chroma_format.begin(), decode_cap_list_[j].chroma_format.end(), chroma_format); + if (it_chroma_format == decode_cap_list_[j].chroma_format.end()) { + decode_cap_list_[j].chroma_format.resize(decode_cap_list_[j].chroma_format.size() + 1); + decode_cap_list_[j].chroma_format[decode_cap_list_[j].chroma_format.size() - 1] = chroma_format; + } + + VAConfigAttrib va_config_attrib; + VAConfigID va_config_id; + unsigned int attr_count; + std::vector attr_list; + va_config_attrib.type = VAConfigAttribRTFormat; + CHECK_VAAPI(vaGetConfigAttributes(va_display, profile_list[i], VAEntrypointVLD, &va_config_attrib, 1)); + CHECK_VAAPI(vaCreateConfig(va_display, profile_list[i], VAEntrypointVLD, &va_config_attrib, 1, &va_config_id)); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display, va_config_id, 0, &attr_count)); + attr_list.resize(attr_count); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display, va_config_id, attr_list.data(), &attr_count)); + for (int k = 0; k < attr_count; k++) { + switch (attr_list[k].type) { + case VASurfaceAttribPixelFormat: + { + switch (attr_list[k].value.value.i) { + case VA_FOURCC_NV12: + decode_cap_list_[j].output_format_mask |= 1 << rocDecVideoSurfaceFormat_NV12; + break; + case VA_FOURCC_P016: + decode_cap_list_[j].output_format_mask |= 1 << rocDecVideoSurfaceFormat_P016; + break; + default: + break; + } + } + break; + case VASurfaceAttribMinWidth: + if (decode_cap_list_[j].min_width == 0 || (decode_cap_list_[j].min_width > 0 && decode_cap_list_[j].min_width > attr_list[k].value.value.i)) { + decode_cap_list_[j].min_width = attr_list[k].value.value.i; + } + break; + case VASurfaceAttribMinHeight: + if (decode_cap_list_[j].min_height == 0 || (decode_cap_list_[j].min_height > 0 && decode_cap_list_[j].min_height > attr_list[k].value.value.i)) { + decode_cap_list_[j].min_height = attr_list[k].value.value.i; + } + break; + case VASurfaceAttribMaxWidth: + if (decode_cap_list_[j].max_width < attr_list[k].value.value.i) { + decode_cap_list_[j].max_width = attr_list[k].value.value.i; + } + break; + case VASurfaceAttribMaxHeight: + if (decode_cap_list_[j].max_height < attr_list[k].value.value.i) { + decode_cap_list_[j].max_height = attr_list[k].value.value.i; + } + break; + default: + break; + } + } + } + } + + initialized_ = true; + return ROCDEC_SUCCESS; + } + rocDecStatus GetDecoderCaps(RocdecDecodeCaps *pdc) { + if (!initialized_) { + if (ProbeHwDecodeCapabilities() != ROCDEC_SUCCESS) { + ERR("Failed to obtain decoder capabilities from driver."); + return ROCDEC_DEVICE_INVALID; + } + } + std::lock_guard lock(mutex); + int i; + for (i = 0; i < decode_cap_list_.size(); i++) { + if (decode_cap_list_[i].codec_type == pdc->codec_type) { + break; + } + } + if (i < decode_cap_list_.size()) { + auto it_chroma_format = std::find(decode_cap_list_[i].chroma_format.begin(), decode_cap_list_[i].chroma_format.end(), pdc->chroma_format); + if (it_chroma_format != decode_cap_list_[i].chroma_format.end() && (pdc->bit_depth_minus_8 + 8) <= decode_cap_list_[i].max_bit_depth) { + pdc->is_supported = 1; + pdc->output_format_mask = decode_cap_list_[i].output_format_mask; + pdc->max_width = decode_cap_list_[i].max_width; + pdc->max_height = decode_cap_list_[i].max_height; + pdc->min_width = decode_cap_list_[i].min_width; + pdc->min_height = decode_cap_list_[i].min_height; + return ROCDEC_SUCCESS; } else { return ROCDEC_NOT_SUPPORTED; } } else { - ERR("Didn't find the decoder capability for " + gcn_arch_name + " GPU!"); - return ROCDEC_NOT_IMPLEMENTED; + return ROCDEC_NOT_SUPPORTED; } } - bool IsCodecConfigSupported(std::string gcn_arch_name, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { + bool IsCodecConfigSupported(rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { + if (!initialized_) { + if (ProbeHwDecodeCapabilities() != ROCDEC_SUCCESS) { + ERR("Failed to obtain decoder capabilities from driver."); + return ROCDEC_DEVICE_INVALID; + } + } std::lock_guard lock(mutex); - std::size_t pos = gcn_arch_name.find_first_of(":"); - std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; - auto it = vcn_spec_table.find(gcn_arch_name_base); - if (it != vcn_spec_table.end()) { - const VcnCodecsSpec& vcn_spec = it->second; - auto it1 = vcn_spec.codecs_spec.find(codec_type); - if (it1 != vcn_spec.codecs_spec.end()) { - const CodecSpec& codec_spec = it1->second; - auto it_chroma_format = std::find(codec_spec.chroma_format.begin(), codec_spec.chroma_format.end(), chroma_format); - auto it_bitdepth_minus8 = std::find(codec_spec.bitdepth_minus8.begin(), codec_spec.bitdepth_minus8.end(), bit_depth_minus8); - if (it_chroma_format != codec_spec.chroma_format.end() && it_bitdepth_minus8 != codec_spec.bitdepth_minus8.end()) { - return (codec_spec.output_format_mask & (static_cast(output_format) + 1)); - } else { - return false; - } + int i; + for (i = 0; i < decode_cap_list_.size(); i++) { + if (decode_cap_list_[i].codec_type == codec_type) { + break; + } + } + if (i < decode_cap_list_.size()) { + auto it_chroma_format = std::find(decode_cap_list_[i].chroma_format.begin(), decode_cap_list_[i].chroma_format.end(), chroma_format); + if (it_chroma_format != decode_cap_list_[i].chroma_format.end() && (bit_depth_minus8 + 8) <= decode_cap_list_[i].max_bit_depth) { + return decode_cap_list_[i].output_format_mask & 1 << (static_cast(output_format)); } else { return false; } @@ -111,26 +245,11 @@ class RocDecVcnCodecSpec { } } private: - std::unordered_map vcn_spec_table; + bool initialized_; + std::vector decode_cap_list_{0}; std::mutex mutex; RocDecVcnCodecSpec() { - //vcn lookup table format: - //{"gcn_arch_name1",{{{codec1, {{chroma_format1_for_codec1, chroma_format2_for_codec1, ...}, {bit_depth1_minus8_for_codec1, bit_depth2_minus8_for_codec1, ...}, output_format_mask_for_codec1, max_width_for_codec1, max_height_for_codec1, min_width_for_codec1, min_height_for_codec1}}, - // {codec2, {{chroma_format1_for_codec2, chroma_format2_for_codec2, ...}, {bit_depth1_minus8_for_codec2, bit_depth2_minus8_for_codec2, ...}, output_format_mask_for_codec2, max_width_for_codec2, max_height_for_codec2, min_width_for_codec2, min_height_for_codec2}}} - // , vcn_instances_for_gcn_arch_name1}}, - // av1 is available only on VCN3.0 and above - vcn_spec_table = { - {"gfx908",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2160, 64, 64}}}, 2}}, - {"gfx90a",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2160, 64, 64}}}, 2}}, - {"gfx940",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 3}}, - {"gfx941",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 4}}, - {"gfx942",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 3}}, - {"gfx1030",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 2}}, - {"gfx1031",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 2}}, - {"gfx1032",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 2}}, - {"gfx1100",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 2}}, - {"gfx1101",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 1}}, - {"gfx1102",{{{rocDecVideoCodec_HEVC, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 7680, 4320, 64, 64}}, {rocDecVideoCodec_AVC, {{rocDecVideoChromaFormat_420}, {0}, 1, 4096, 2176, 64, 64}}, {rocDecVideoCodec_AV1, {{rocDecVideoChromaFormat_420}, {0, 2}, 3, 8192, 4352, 64, 64}}}, 2}},}; + initialized_ = false; } RocDecVcnCodecSpec(const RocDecVcnCodecSpec&) = delete; RocDecVcnCodecSpec& operator = (const RocDecVcnCodecSpec) = delete; diff --git a/src/rocdecode/rocdecode_api.cpp b/src/rocdecode/rocdecode_api.cpp index 5a18b4e4..4a1ff659 100644 --- a/src/rocdecode/rocdecode_api.cpp +++ b/src/rocdecode/rocdecode_api.cpp @@ -95,7 +95,7 @@ rocDecGetDecoderCaps(RocdecDecodeCaps *pdc) { } RocDecVcnCodecSpec& vcn_codec_spec = RocDecVcnCodecSpec::GetInstance(); - return vcn_codec_spec.GetDecoderCaps(hip_dev_prop.gcnArchName, pdc); + return vcn_codec_spec.GetDecoderCaps(pdc); } /*****************************************************************************************************/ diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index bdc62c0e..9450fbc7 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -64,7 +64,7 @@ rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string device_name, std:: //Before initializing the VAAPI, first check to see if the requested codec config is supported RocDecVcnCodecSpec& vcn_codec_spec = RocDecVcnCodecSpec::GetInstance(); - if (!vcn_codec_spec.IsCodecConfigSupported(gcn_arch_name, decoder_create_info_.codec_type, decoder_create_info_.chroma_format, + if (!vcn_codec_spec.IsCodecConfigSupported(decoder_create_info_.codec_type, decoder_create_info_.chroma_format, decoder_create_info_.bit_depth_minus_8, decoder_create_info_.output_format)) { ERR("The codec config combination is not supported."); return ROCDEC_NOT_SUPPORTED; @@ -166,6 +166,7 @@ rocDecStatus VaapiVideoDecoder::CreateDecoderConfig() { va_config_attrib_.type = VAConfigAttribRTFormat; CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_, 1)); CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_, 1, &va_config_id_)); + return ROCDEC_SUCCESS; } From ce04289957e67a0239cb319cfd23f97abc9d947e Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Thu, 19 Sep 2024 08:38:22 -0400 Subject: [PATCH 02/10] * rocDecode: Added number of decoder prob and a few missing tear down calls. --- api/rocdecode.h | 1 + src/rocdecode/roc_decoder_caps.h | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/api/rocdecode.h b/api/rocdecode.h index a8c98bd2..39f30f2e 100644 --- a/api/rocdecode.h +++ b/api/rocdecode.h @@ -150,6 +150,7 @@ typedef struct _RocdecDecodeCaps { uint32_t bit_depth_minus_8; /**< IN: The Value "BitDepth minus 8" */ uint32_t reserved_1[3]; /**< Reserved for future use - set to zero */ uint8_t is_supported; /**< OUT: 1 if codec supported, 0 if not supported */ + uint8_t num_decoders; /**< OUT: Number of Decoders that can support IN params */ uint16_t output_format_mask; /**< OUT: each bit represents corresponding rocDecVideoSurfaceFormat enum */ uint32_t max_width; /**< OUT: Max supported coded width in pixels */ uint32_t max_height; /**< OUT: Max supported coded height in pixels */ diff --git a/src/rocdecode/roc_decoder_caps.h b/src/rocdecode/roc_decoder_caps.h index b7fcae61..c3cc432e 100644 --- a/src/rocdecode/roc_decoder_caps.h +++ b/src/rocdecode/roc_decoder_caps.h @@ -26,6 +26,8 @@ THE SOFTWARE. #include #include #include +#include +#include #include "../commons.h" #include "../../api/rocdecode.h" @@ -63,13 +65,24 @@ class RocDecVcnCodecSpec { ERR("Failed to open drm node." + drm_node); return ROCDEC_DEVICE_INVALID; } + amdgpu_device_handle dev_handle; + uint32_t major_version = 0, minor_version = 0; + if (amdgpu_device_initialize(drm_fd, &major_version, &minor_version, &dev_handle)) { + ERR("GPU device initialization failed: " + drm_node); + return ROCDEC_DEVICE_INVALID; + } + if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_ENC, &num_codec_engines_)) { + ERR("Failed to get the number of video codec engines."); + } + VADisplay va_display = vaGetDisplayDRM(drm_fd); if (!va_display) { ERR("Failed to create va_display."); return ROCDEC_DEVICE_INVALID; } - int major_version = 0, minor_version = 0; - CHECK_VAAPI(vaInitialize(va_display, &major_version, &minor_version)); + vaSetInfoCallback(va_display, NULL, NULL); + int va_major_version = 0, va_minor_version = 0; + CHECK_VAAPI(vaInitialize(va_display, &va_major_version, &va_minor_version)); int num_profiles = 0; std::vector profile_list; @@ -144,8 +157,7 @@ class RocDecVcnCodecSpec { CHECK_VAAPI(vaQuerySurfaceAttributes(va_display, va_config_id, attr_list.data(), &attr_count)); for (int k = 0; k < attr_count; k++) { switch (attr_list[k].type) { - case VASurfaceAttribPixelFormat: - { + case VASurfaceAttribPixelFormat: { switch (attr_list[k].value.value.i) { case VA_FOURCC_NV12: decode_cap_list_[j].output_format_mask |= 1 << rocDecVideoSurfaceFormat_NV12; @@ -182,8 +194,11 @@ class RocDecVcnCodecSpec { break; } } + CHECK_VAAPI(vaDestroyConfig(va_display, va_config_id)); } } + CHECK_VAAPI(vaTerminate(va_display)); + close(drm_fd); initialized_ = true; return ROCDEC_SUCCESS; @@ -246,6 +261,7 @@ class RocDecVcnCodecSpec { } private: bool initialized_; + uint32_t num_codec_engines_ = 1; std::vector decode_cap_list_{0}; std::mutex mutex; RocDecVcnCodecSpec() { From f999c80af3e331c096ffa32c38cd416b04d6c301 Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Mon, 16 Dec 2024 13:30:09 -0500 Subject: [PATCH 03/10] * rocDecode/HW cap change: Added a new singleton class, GpuVaContext, to handle HIP and VA initialization and VA attributes probe for both HW capability check and decoder initialization. --- src/rocdecode/roc_decoder.cpp | 18 - src/rocdecode/roc_decoder.h | 9 - src/rocdecode/roc_decoder_caps.h | 237 +---------- src/rocdecode/rocdecode_api.cpp | 30 +- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 174 +------- src/rocdecode/vaapi/vaapi_videodecoder.h | 461 ++++++++++++++++++++- utils/rocvideodecode/roc_video_dec.cpp | 7 +- 7 files changed, 503 insertions(+), 433 deletions(-) diff --git a/src/rocdecode/roc_decoder.cpp b/src/rocdecode/roc_decoder.cpp index 53b5910d..9951a30b 100644 --- a/src/rocdecode/roc_decoder.cpp +++ b/src/rocdecode/roc_decoder.cpp @@ -45,11 +45,6 @@ RocDecoder::RocDecoder(RocDecoderCreateInfo& decoder_create_info): va_video_deco rocDecStatus RocDecoder::InitializeDecoder() { rocDecStatus rocdec_status = ROCDEC_SUCCESS; - rocdec_status = InitHIP(decoder_create_info_.device_id); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize the HIP."); - return rocdec_status; - } if (decoder_create_info_.num_decode_surfaces < 1) { ERR("Invalid number of decode surfaces."); return ROCDEC_INVALID_PARAMETER; @@ -186,16 +181,3 @@ rocDecStatus RocDecoder::FreeVideoFrame(int pic_idx) { return ROCDEC_SUCCESS; } - - -rocDecStatus RocDecoder::InitHIP(int device_id) { - CHECK_HIP(hipGetDeviceCount(&num_devices_)); - if (num_devices_ < 1) { - ERR("Didn't find any GPU."); - return ROCDEC_DEVICE_INVALID; - } - CHECK_HIP(hipSetDevice(device_id)); - CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id)); - - return ROCDEC_SUCCESS; -} diff --git a/src/rocdecode/roc_decoder.h b/src/rocdecode/roc_decoder.h index f4b1ebc4..bdb62c08 100644 --- a/src/rocdecode/roc_decoder.h +++ b/src/rocdecode/roc_decoder.h @@ -34,14 +34,6 @@ THE SOFTWARE. #include #include "vaapi/vaapi_videodecoder.h" -#define CHECK_HIP(call) {\ - hipError_t hip_status = call;\ - if (hip_status != hipSuccess) {\ - std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ - return ROCDEC_RUNTIME_ERROR;\ - }\ -} - struct HipInteropDeviceMem { hipExternalMemory_t hip_ext_mem; // Interface to the vaapi-hip interop uint8_t* hip_mapped_device_mem; // Mapped device memory for the YUV plane @@ -63,7 +55,6 @@ class RocDecoder { rocDecStatus GetVideoFrame(int pic_idx, void *dev_mem_ptr[3], uint32_t horizontal_pitch[3], RocdecProcParams *vid_postproc_params); private: - rocDecStatus InitHIP(int device_id); rocDecStatus FreeVideoFrame(int pic_idx); int num_devices_; RocDecoderCreateInfo decoder_create_info_; diff --git a/src/rocdecode/roc_decoder_caps.h b/src/rocdecode/roc_decoder_caps.h index c3cc432e..49181c5d 100644 --- a/src/rocdecode/roc_decoder_caps.h +++ b/src/rocdecode/roc_decoder_caps.h @@ -26,30 +26,9 @@ THE SOFTWARE. #include #include #include -#include -#include #include "../commons.h" #include "../../api/rocdecode.h" - -#define CHECK_VAAPI(call) {\ - VAStatus va_status = call;\ - if (va_status != VA_STATUS_SUCCESS) {\ - std::cout << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ - return ROCDEC_RUNTIME_ERROR;\ - }\ -} - -// The CodecSpec struct contains information for an individual codec (e.g., rocDecVideoCodec_HEVC) -struct CodecSpec { - rocDecVideoCodec codec_type; - std::vector chroma_format; - int max_bit_depth; - uint16_t output_format_mask; - uint32_t max_width; - uint32_t max_height; - uint16_t min_width; - uint16_t min_height; -}; +#include "vaapi_videodecoder.h" // The RocDecVcnCodecSpec singleton class for providing access to the the vcn_spec_table class RocDecVcnCodecSpec { @@ -58,211 +37,33 @@ class RocDecVcnCodecSpec { static RocDecVcnCodecSpec instance; return instance; } - rocDecStatus ProbeHwDecodeCapabilities() { - std::string drm_node = "/dev/dri/renderD128"; // look at device_id 0 - int drm_fd = open(drm_node.c_str(), O_RDWR); - if (drm_fd < 0) { - ERR("Failed to open drm node." + drm_node); - return ROCDEC_DEVICE_INVALID; - } - amdgpu_device_handle dev_handle; - uint32_t major_version = 0, minor_version = 0; - if (amdgpu_device_initialize(drm_fd, &major_version, &minor_version, &dev_handle)) { - ERR("GPU device initialization failed: " + drm_node); - return ROCDEC_DEVICE_INVALID; - } - if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_ENC, &num_codec_engines_)) { - ERR("Failed to get the number of video codec engines."); - } - - VADisplay va_display = vaGetDisplayDRM(drm_fd); - if (!va_display) { - ERR("Failed to create va_display."); - return ROCDEC_DEVICE_INVALID; - } - vaSetInfoCallback(va_display, NULL, NULL); - int va_major_version = 0, va_minor_version = 0; - CHECK_VAAPI(vaInitialize(va_display, &va_major_version, &va_minor_version)); - - int num_profiles = 0; - std::vector profile_list; - num_profiles = vaMaxNumProfiles(va_display); - profile_list.resize(num_profiles); - CHECK_VAAPI(vaQueryConfigProfiles(va_display, profile_list.data(), &num_profiles)); - - // To simplify, merge all profile attributes into one codec type. - rocDecVideoCodec codec_type; - rocDecVideoChromaFormat chroma_format; - int bit_depth; - for (int i = 0; i < num_profiles; i++) { - bool interested = false; - bit_depth = 8; - switch (profile_list[i]) { - case VAProfileH264Main: - case VAProfileH264High: - case VAProfileH264ConstrainedBaseline: - codec_type = rocDecVideoCodec_AVC; - chroma_format = rocDecVideoChromaFormat_420; - interested = true; - break; - - case VAProfileHEVCMain10: - bit_depth = 10; - case VAProfileHEVCMain: - codec_type = rocDecVideoCodec_HEVC; - chroma_format = rocDecVideoChromaFormat_420; - interested = true; - break; - - case VAProfileAV1Profile0: - codec_type = rocDecVideoCodec_AV1; - chroma_format = rocDecVideoChromaFormat_420; - bit_depth = 10; // both 8 and 10 bit - interested = true; - break; - - default: - break; - } - - if (interested) { - int j = 0; - for (j = 0; j < decode_cap_list_.size(); j++) { - if (decode_cap_list_[j].codec_type == codec_type) { - break; - } - } - if (decode_cap_list_.size() == 0 || (decode_cap_list_.size() && j == decode_cap_list_.size())) { - decode_cap_list_.resize(decode_cap_list_.size() + 1, {}); - } - decode_cap_list_[j].codec_type = codec_type; - if (decode_cap_list_[j].max_bit_depth < bit_depth) { - decode_cap_list_[j].max_bit_depth = bit_depth; - } - auto it_chroma_format = std::find(decode_cap_list_[j].chroma_format.begin(), decode_cap_list_[j].chroma_format.end(), chroma_format); - if (it_chroma_format == decode_cap_list_[j].chroma_format.end()) { - decode_cap_list_[j].chroma_format.resize(decode_cap_list_[j].chroma_format.size() + 1); - decode_cap_list_[j].chroma_format[decode_cap_list_[j].chroma_format.size() - 1] = chroma_format; - } - - VAConfigAttrib va_config_attrib; - VAConfigID va_config_id; - unsigned int attr_count; - std::vector attr_list; - va_config_attrib.type = VAConfigAttribRTFormat; - CHECK_VAAPI(vaGetConfigAttributes(va_display, profile_list[i], VAEntrypointVLD, &va_config_attrib, 1)); - CHECK_VAAPI(vaCreateConfig(va_display, profile_list[i], VAEntrypointVLD, &va_config_attrib, 1, &va_config_id)); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display, va_config_id, 0, &attr_count)); - attr_list.resize(attr_count); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display, va_config_id, attr_list.data(), &attr_count)); - for (int k = 0; k < attr_count; k++) { - switch (attr_list[k].type) { - case VASurfaceAttribPixelFormat: { - switch (attr_list[k].value.value.i) { - case VA_FOURCC_NV12: - decode_cap_list_[j].output_format_mask |= 1 << rocDecVideoSurfaceFormat_NV12; - break; - case VA_FOURCC_P016: - decode_cap_list_[j].output_format_mask |= 1 << rocDecVideoSurfaceFormat_P016; - break; - default: - break; - } - } - break; - case VASurfaceAttribMinWidth: - if (decode_cap_list_[j].min_width == 0 || (decode_cap_list_[j].min_width > 0 && decode_cap_list_[j].min_width > attr_list[k].value.value.i)) { - decode_cap_list_[j].min_width = attr_list[k].value.value.i; - } - break; - case VASurfaceAttribMinHeight: - if (decode_cap_list_[j].min_height == 0 || (decode_cap_list_[j].min_height > 0 && decode_cap_list_[j].min_height > attr_list[k].value.value.i)) { - decode_cap_list_[j].min_height = attr_list[k].value.value.i; - } - break; - case VASurfaceAttribMaxWidth: - if (decode_cap_list_[j].max_width < attr_list[k].value.value.i) { - decode_cap_list_[j].max_width = attr_list[k].value.value.i; - } - break; - case VASurfaceAttribMaxHeight: - if (decode_cap_list_[j].max_height < attr_list[k].value.value.i) { - decode_cap_list_[j].max_height = attr_list[k].value.value.i; - } - break; - default: - break; - } - } - CHECK_VAAPI(vaDestroyConfig(va_display, va_config_id)); - } - } - CHECK_VAAPI(vaTerminate(va_display)); - close(drm_fd); - - initialized_ = true; - return ROCDEC_SUCCESS; - } rocDecStatus GetDecoderCaps(RocdecDecodeCaps *pdc) { - if (!initialized_) { - if (ProbeHwDecodeCapabilities() != ROCDEC_SUCCESS) { - ERR("Failed to obtain decoder capabilities from driver."); - return ROCDEC_DEVICE_INVALID; - } - } - std::lock_guard lock(mutex); - int i; - for (i = 0; i < decode_cap_list_.size(); i++) { - if (decode_cap_list_[i].codec_type == pdc->codec_type) { - break; - } - } - if (i < decode_cap_list_.size()) { - auto it_chroma_format = std::find(decode_cap_list_[i].chroma_format.begin(), decode_cap_list_[i].chroma_format.end(), pdc->chroma_format); - if (it_chroma_format != decode_cap_list_[i].chroma_format.end() && (pdc->bit_depth_minus_8 + 8) <= decode_cap_list_[i].max_bit_depth) { - pdc->is_supported = 1; - pdc->output_format_mask = decode_cap_list_[i].output_format_mask; - pdc->max_width = decode_cap_list_[i].max_width; - pdc->max_height = decode_cap_list_[i].max_height; - pdc->min_width = decode_cap_list_[i].min_width; - pdc->min_height = decode_cap_list_[i].min_height; - return ROCDEC_SUCCESS; - } else { - return ROCDEC_NOT_SUPPORTED; - } + // Jefftest + GpuVaContext& va_ctx = GpuVaContext::GetInstance(); + va_ctx.Initialize(pdc->device_id); + if (va_ctx.CheckDecCapForCodecType(pdc) != ROCDEC_SUCCESS) { + ERR("Failed to obtain decoder capabilities from driver."); + return ROCDEC_DEVICE_INVALID; } else { - return ROCDEC_NOT_SUPPORTED; + return ROCDEC_SUCCESS; } } - bool IsCodecConfigSupported(rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { - if (!initialized_) { - if (ProbeHwDecodeCapabilities() != ROCDEC_SUCCESS) { - ERR("Failed to obtain decoder capabilities from driver."); - return ROCDEC_DEVICE_INVALID; - } - } - std::lock_guard lock(mutex); - int i; - for (i = 0; i < decode_cap_list_.size(); i++) { - if (decode_cap_list_[i].codec_type == codec_type) { - break; - } - } - if (i < decode_cap_list_.size()) { - auto it_chroma_format = std::find(decode_cap_list_[i].chroma_format.begin(), decode_cap_list_[i].chroma_format.end(), chroma_format); - if (it_chroma_format != decode_cap_list_[i].chroma_format.end() && (bit_depth_minus8 + 8) <= decode_cap_list_[i].max_bit_depth) { - return decode_cap_list_[i].output_format_mask & 1 << (static_cast(output_format)); - } else { - return false; - } - } else { + bool IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { + RocdecDecodeCaps decode_caps; + decode_caps.device_id = device_id; + decode_caps.codec_type = codec_type; + decode_caps.chroma_format = chroma_format; + decode_caps.bit_depth_minus_8 = bit_depth_minus8; + if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || (decode_caps.is_supported == false) || ((decode_caps.output_format_mask & (1 << output_format)) == 0)) { return false; + } else { + return true; } } private: bool initialized_; - uint32_t num_codec_engines_ = 1; - std::vector decode_cap_list_{0}; + uint32_t num_dec_engines_ = 1; + // Jefftest std::vector decode_cap_list_{0}; std::mutex mutex; RocDecVcnCodecSpec() { initialized_ = false; diff --git a/src/rocdecode/rocdecode_api.cpp b/src/rocdecode/rocdecode_api.cpp index ec8959f6..4af07f8c 100644 --- a/src/rocdecode/rocdecode_api.cpp +++ b/src/rocdecode/rocdecode_api.cpp @@ -21,7 +21,7 @@ THE SOFTWARE. */ #include "dec_handle.h" #include "rocdecode.h" -#include "roc_decoder_caps.h" +#include "vaapi_videodecoder.h" #include "../commons.h" namespace rocdecode { @@ -72,30 +72,14 @@ rocDecGetDecoderCaps(RocdecDecodeCaps *pdc) { if (pdc == nullptr) { return ROCDEC_INVALID_PARAMETER; } - hipError_t hip_status = hipSuccess; - int num_devices = 0; - hipDeviceProp_t hip_dev_prop; - hip_status = hipGetDeviceCount(&num_devices); - if (hip_status != hipSuccess) { - ERR("ERROR: hipGetDeviceCount failed!" + TOSTR(hip_status)); + GpuVaContext& va_ctx = GpuVaContext::GetInstance(); + va_ctx.Initialize(pdc->device_id); + if (va_ctx.CheckDecCapForCodecType(pdc) != ROCDEC_SUCCESS) { + ERR("Failed to obtain decoder capabilities from driver."); return ROCDEC_DEVICE_INVALID; + } else { + return ROCDEC_SUCCESS; } - if (num_devices < 1) { - ERR("ERROR: didn't find any GPU!"); - return ROCDEC_DEVICE_INVALID; - } - if (pdc->device_id >= num_devices) { - ERR("ERROR: the requested device_id is not found! "); - return ROCDEC_DEVICE_INVALID; - } - hip_status = hipGetDeviceProperties(&hip_dev_prop, pdc->device_id); - if (hip_status != hipSuccess) { - ERR("ERROR: hipGetDeviceProperties for device (" +TOSTR(pdc->device_id) + " ) failed! (" + TOSTR(hip_status) + ")" ); - return ROCDEC_DEVICE_INVALID; - } - - RocDecVcnCodecSpec& vcn_codec_spec = RocDecVcnCodecSpec::GetInstance(); - return vcn_codec_spec.GetDecoderCaps(pdc); } /*****************************************************************************************************/ diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index 9e9563ca..3e867474 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -59,50 +59,33 @@ VaapiVideoDecoder::~VaapiVideoDecoder() { } } +bool VaapiVideoDecoder::IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { + RocdecDecodeCaps decode_caps; + decode_caps.device_id = device_id; + decode_caps.codec_type = codec_type; + decode_caps.chroma_format = chroma_format; + decode_caps.bit_depth_minus_8 = bit_depth_minus8; + if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || (decode_caps.is_supported == false) || ((decode_caps.output_format_mask & (1 << output_format)) == 0)) { + return false; + } else { + return true; + } +} + rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string device_name, std::string gcn_arch_name) { rocDecStatus rocdec_status = ROCDEC_SUCCESS; - //Before initializing the VAAPI, first check to see if the requested codec config is supported - RocDecVcnCodecSpec& vcn_codec_spec = RocDecVcnCodecSpec::GetInstance(); - if (!vcn_codec_spec.IsCodecConfigSupported(decoder_create_info_.codec_type, decoder_create_info_.chroma_format, + // Before initializing the VAAPI, first check to see if the requested codec config is supported + if (!IsCodecConfigSupported(decoder_create_info_.device_id, decoder_create_info_.codec_type, decoder_create_info_.chroma_format, decoder_create_info_.bit_depth_minus_8, decoder_create_info_.output_format)) { ERR("The codec config combination is not supported."); return ROCDEC_NOT_SUPPORTED; } - std::size_t pos = gcn_arch_name.find_first_of(":"); - std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; - - std::vector visible_devices; - GetVisibleDevices(visible_devices); - - int offset = 0; - if (gcn_arch_name_base.compare("gfx942") == 0) { - std::vector current_compute_partitions; - GetCurrentComputePartition(current_compute_partitions); - if (current_compute_partitions.empty()) { - //if the current_compute_partitions is empty then the default SPX mode is assumed. - if (decoder_create_info_.device_id < visible_devices.size()) { - offset = visible_devices[decoder_create_info_.device_id] * 7; - } else { - offset = decoder_create_info_.device_id * 7; - } - } else { - GetDrmNodeOffset(device_name, decoder_create_info_.device_id, visible_devices, current_compute_partitions, offset); - } - } - - std::string drm_node = "/dev/dri/renderD"; - if (decoder_create_info_.device_id < visible_devices.size()) { - drm_node += std::to_string(128 + offset + visible_devices[decoder_create_info_.device_id]); - } else { - drm_node += std::to_string(128 + offset + decoder_create_info_.device_id); - } - rocdec_status = InitVAAPI(drm_node); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize the VAAPI."); - return rocdec_status; - } + // Jefftest + GpuVaContext& va_ctx = GpuVaContext::GetInstance(); + va_ctx.Initialize(decoder_create_info_.device_id); + va_display_ = va_ctx.va_display_; rocdec_status = CreateDecoderConfig(); if (rocdec_status != ROCDEC_SUCCESS) { ERR("Failed to create a VAAPI decoder configuration."); @@ -121,23 +104,6 @@ rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string device_name, std:: return rocdec_status; } -rocDecStatus VaapiVideoDecoder::InitVAAPI(std::string drm_node) { - drm_fd_ = open(drm_node.c_str(), O_RDWR); - if (drm_fd_ < 0) { - ERR("Failed to open drm node." + drm_node); - return ROCDEC_NOT_INITIALIZED; - } - va_display_ = vaGetDisplayDRM(drm_fd_); - if (!va_display_) { - ERR("Failed to create va_display."); - return ROCDEC_NOT_INITIALIZED; - } - vaSetInfoCallback(va_display_, NULL, NULL); - int major_version = 0, minor_version = 0; - CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version)); - return ROCDEC_SUCCESS; -} - rocDecStatus VaapiVideoDecoder::CreateDecoderConfig() { switch (decoder_create_info_.codec_type) { case rocDecVideoCodec_HEVC: @@ -511,106 +477,4 @@ rocDecStatus VaapiVideoDecoder::SyncSurface(int pic_idx) { CHECK_VAAPI(vaSyncSurface(va_display_, va_surface_ids_[pic_idx])); } return ROCDEC_SUCCESS; -} - -void VaapiVideoDecoder::GetVisibleDevices(std::vector& visible_devices_vetor) { - char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); - if (visible_devices != nullptr) { - char *token = std::strtok(visible_devices,","); - while (token != nullptr) { - visible_devices_vetor.push_back(std::atoi(token)); - token = std::strtok(nullptr,","); - } - std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); - } -} - -void VaapiVideoDecoder::GetCurrentComputePartition(std::vector ¤t_compute_partitions) { - std::string search_path = "/sys/devices/"; - std::string partition_file = "current_compute_partition"; - std::error_code ec; - if (fs::exists(search_path)) { - for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) { - try { - if (it->path().filename() == partition_file) { - std::ifstream file(it->path()); - if (file.is_open()) { - std::string partition; - std::getline(file, partition); - if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { - current_compute_partitions.push_back(kSpx); - } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { - current_compute_partitions.push_back(kDpx); - } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { - current_compute_partitions.push_back(kTpx); - } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { - current_compute_partitions.push_back(kQpx); - } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { - current_compute_partitions.push_back(kCpx); - } - file.close(); - } - } - ++it; - } catch (fs::filesystem_error& e) { - it.increment(ec); - } - } - } -} - -void VaapiVideoDecoder::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, - std::vector ¤t_compute_partitions, int &offset) { - - if (!current_compute_partitions.empty()) { - switch (current_compute_partitions[0]) { - case kSpx: - if (device_id < visible_devices.size()) { - offset = visible_devices[device_id] * 7; - } else { - offset = device_id * 7; - } - break; - case kDpx: - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 2) * 6; - } else { - offset = (device_id / 2) * 6; - } - break; - case kTpx: - // Please note that although there are only 6 XCCs per socket on MI300A, - // there are two dummy render nodes added by the driver. - // This needs to be taken into account when creating drm_node on each socket in TPX mode. - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 3) * 5; - } else { - offset = (device_id / 3) * 5; - } - break; - case kQpx: - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 4) * 4; - } else { - offset = (device_id / 4) * 4; - } - break; - case kCpx: - // Please note that both MI300A and MI300X have the same gfx_arch_name which is - // gfx942. Therefore we cannot use the gfx942 to identify MI300A. - // instead use the device name and look for MI300A - // Also, as explained aboe in the TPX mode section, we need to be taken into account - // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. - std::string mi300a = "MI300A"; - size_t found_mi300a = device_name.find(mi300a); - if (found_mi300a != std::string::npos) { - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 6) * 2; - } else { - offset = (device_id / 6) * 2; - } - } - break; - } - } } \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index 201bd6d0..0604172f 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -29,6 +29,8 @@ THE SOFTWARE. #include #include #include +#include +#include #if __cplusplus >= 201703L && __has_include() #include namespace fs = std::filesystem; @@ -36,13 +38,22 @@ THE SOFTWARE. #include namespace fs = std::experimental::filesystem; #endif +#include +#include #include #include #include -#include "../roc_decoder_caps.h" #include "../../commons.h" #include "../../../api/rocdecode.h" +#define CHECK_HIP(call) {\ + hipError_t hip_status = call;\ + if (hip_status != hipSuccess) {\ + std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCDEC_RUNTIME_ERROR;\ + }\ +} + #define CHECK_VAAPI(call) {\ VAStatus va_status = call;\ if (va_status != VA_STATUS_SUCCESS) {\ @@ -71,13 +82,14 @@ class VaapiVideoDecoder { rocDecStatus ExportSurface(int pic_idx, VADRMPRIMESurfaceDescriptor &va_drm_prime_surface_desc); rocDecStatus SyncSurface(int pic_idx); rocDecStatus ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconfig_params); + private: RocDecoderCreateInfo decoder_create_info_; int drm_fd_; VADisplay va_display_; + VAProfile va_profile_; VAConfigAttrib va_config_attrib_; VAConfigID va_config_id_; - VAProfile va_profile_; VAContextID va_context_id_; std::vector va_surface_ids_; @@ -88,13 +100,448 @@ class VaapiVideoDecoder { VABufferID slice_data_buf_id_; uint32_t slice_data_buf_size_; - rocDecStatus InitVAAPI(std::string drm_node); + bool IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format); rocDecStatus CreateDecoderConfig(); rocDecStatus CreateSurfaces(); rocDecStatus CreateContext(); rocDecStatus DestroyDataBuffers(); - void GetVisibleDevices(std::vector& visible_devices); - void GetCurrentComputePartition(std::vector &currnet_compute_partitions); - void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, - std::vector ¤t_compute_partitions, int &offset); +}; + +// Jefftest +// The GpuVaContext singleton class providing access to the the GPU VA services +class GpuVaContext { +public: + int num_devices_; + int device_id_; + int drm_fd_; + VADisplay va_display_; + hipDeviceProp_t hip_dev_prop_; + uint32_t num_dec_engines_; + int num_va_profiles_; + std::vector va_profile_list_; // supported profiles by the current GPU + VAProfile va_profile_; // current profile used + VAConfigID va_config_id_; + uint32_t rt_format_attrib_; + uint32_t output_format_mask_; + uint32_t max_width_; + uint32_t max_height_; + uint32_t min_width_; + uint32_t min_height_; + + static GpuVaContext& GetInstance() { + printf("Get instance .....\n"); // Jefftest + static GpuVaContext instance; + return instance; + } + + rocDecStatus Initialize(int device_id) { + printf("Initialize(): device_id = %d, initialized_ = %d\n", device_id, initialized_); // Jefftest + if ( initialized_ && device_id != device_id_) { + CHECK_VAAPI(vaTerminate(va_display_)); + initialized_ = false; + } + if (!initialized_) { + std::lock_guard lock(mutex); + device_id_ = device_id; + rocDecStatus rocdec_status = ROCDEC_SUCCESS; + rocdec_status = InitHIP(device_id_); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize the HIP."); + return rocdec_status; + } + + std::cout << hip_dev_prop_.name << std::endl; // Jefftest + std::cout << hip_dev_prop_.gcnArchName << std::endl; // Jefftest + std::string gcn_arch_name = hip_dev_prop_.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + std::vector visible_devices; + GetVisibleDevices(visible_devices); + std::cout << visible_devices.size() << std::endl; // Jefftest + + int offset = 0; + if (gcn_arch_name_base.compare("gfx942") == 0) { + std::vector current_compute_partitions; + GetCurrentComputePartition(current_compute_partitions); + if (current_compute_partitions.empty()) { + //if the current_compute_partitions is empty then the default SPX mode is assumed. + if (device_id_ < visible_devices.size()) { + offset = visible_devices[device_id_] * 7; + } else { + offset = device_id_ * 7; + } + } else { + GetDrmNodeOffset(hip_dev_prop_.name, device_id_, visible_devices, current_compute_partitions, offset); + } + } + + std::string drm_node = "/dev/dri/renderD"; + if (device_id_ < visible_devices.size()) { + drm_node += std::to_string(128 + offset + visible_devices[device_id_]); + } else { + drm_node += std::to_string(128 + offset + device_id_); + } + std::cout << "drm_node = " << drm_node << std::endl; // Jefftest + + rocdec_status = InitVAAPI(drm_node); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize the VAAPI."); + return rocdec_status; + } + + amdgpu_device_handle dev_handle; + uint32_t major_version = 0, minor_version = 0; + if (amdgpu_device_initialize(drm_fd_, &major_version, &minor_version, &dev_handle)) { + ERR("GPU device initialization failed: " + drm_node); + return ROCDEC_DEVICE_INVALID; + } + if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_DEC, &num_dec_engines_)) { + ERR("Failed to get the number of video decode engines."); + } + printf("num_dec_engines_ = %d ....\n", num_dec_engines_); // Jefftest + amdgpu_device_deinitialize(dev_handle); + + // Prob VA profiles + num_va_profiles_ = vaMaxNumProfiles(va_display_); + std::cout << "num_va_profiles_ = " << num_va_profiles_ << std::endl; // Jefftest + va_profile_list_.resize(num_va_profiles_); + CHECK_VAAPI(vaQueryConfigProfiles(va_display_, va_profile_list_.data(), &num_va_profiles_)); + std::cout << "num_va_profiles_ = " << num_va_profiles_ << std::endl; // Jefftest + + + initialized_ = true; + } + return ROCDEC_SUCCESS; + } + + rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { + if (dec_cap == nullptr) { + ERR("Null decode capability struct pointer."); + return ROCDEC_INVALID_PARAMETER; + } + std::lock_guard lock(mutex); + rocDecStatus rocdec_status = ROCDEC_SUCCESS; + if (!initialized_) { + rocdec_status = Initialize(dec_cap->device_id); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize."); + return rocdec_status; + } + } + + dec_cap->is_supported = 1; // init value + VAProfile va_profile = VAProfileNone; + switch (dec_cap->codec_type) { + case rocDecVideoCodec_HEVC: + if (dec_cap->bit_depth_minus_8 == 0) { + va_profile = VAProfileHEVCMain; + } else if (dec_cap->bit_depth_minus_8 == 2) { + va_profile = VAProfileHEVCMain10; + } + break; + case rocDecVideoCodec_AVC: + va_profile = VAProfileH264Main; + break; + case rocDecVideoCodec_VP9: + if (dec_cap->bit_depth_minus_8 == 0) { + va_profile = VAProfileVP9Profile0; + } else if (dec_cap->bit_depth_minus_8 == 2) { + va_profile = VAProfileVP9Profile2; + } + break; + case rocDecVideoCodec_AV1: + #if VA_CHECK_VERSION(1,6,0) + va_profile = VAProfileAV1Profile0; + #else + va_profile = static_cast(32); // VAProfileAV1Profile0; + #endif + break; + default: + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + + int i; + for (i = 0; i < num_va_profiles_; i++) { + if (va_profile_list_[i] == va_profile) { + break; + } + } + if (i == num_va_profiles_) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + + // Check if the config attributes of the profile have been probed before + //if (config_attributes_probed_ == false) + if (va_profile != va_profile_ || config_attributes_probed_ == false) { + va_profile_ = va_profile; + + std::cout << "Create VA config .... " << std::endl; // Jefftest + VAConfigAttrib va_config_attrib; + unsigned int attr_count; + std::vector attr_list; + va_config_attrib.type = VAConfigAttribRTFormat; + CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1)); + rt_format_attrib_ = va_config_attrib.value; + + CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1, &va_config_id_)); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, 0, &attr_count)); + attr_list.resize(attr_count); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, attr_list.data(), &attr_count)); + output_format_mask_ = 0; + CHECK_VAAPI(vaDestroyConfig(va_display_, va_config_id_)); + for (int k = 0; k < attr_count; k++) { + switch (attr_list[k].type) { + case VASurfaceAttribPixelFormat: { + switch (attr_list[k].value.value.i) { + case VA_FOURCC_NV12: + output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_NV12; + break; + case VA_FOURCC_P016: + output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_P016; + break; + default: + break; + } + } + break; + case VASurfaceAttribMinWidth: + min_width_ = attr_list[k].value.value.i; + break; + case VASurfaceAttribMinHeight: + min_height_ = attr_list[k].value.value.i; + break; + case VASurfaceAttribMaxWidth: + max_width_ = attr_list[k].value.value.i; + break; + case VASurfaceAttribMaxHeight: + max_height_ = attr_list[k].value.value.i; + break; + default: + break; + } + } + config_attributes_probed_ = true; + } + + // Check chroma format + switch (dec_cap->chroma_format) { + case rocDecVideoChromaFormat_Monochrome: { + if ((rt_format_attrib_ & VA_RT_FORMAT_YUV400) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_420: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV420_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_422: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV422_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_444: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV444_10 | VA_RT_FORMAT_YUV444_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + default: { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + } + // Check bit depth + switch (dec_cap->bit_depth_minus_8) { + case 0: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV400)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case 2: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV444_10)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case 4: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_12 | VA_RT_FORMAT_YUV422_12 | VA_RT_FORMAT_YUV444_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + default: { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + } + + dec_cap->num_decoders = num_dec_engines_; + dec_cap->output_format_mask = output_format_mask_; + dec_cap->max_width = max_width_; + dec_cap->max_height = max_height_; + dec_cap->min_width = min_width_; + dec_cap->min_height = min_height_; + // Jefftest + std::cout << "devicde_id = " << (int)dec_cap->device_id << ", codec_type = " << dec_cap->codec_type << ", chroma_format = " << dec_cap->chroma_format << ", bit_depth_minus_8 = " << dec_cap->bit_depth_minus_8 << ", is_supported = " << (int)dec_cap->is_supported << ", num_decoders = " << (int)dec_cap->num_decoders << ", output_format_mask = " << dec_cap->output_format_mask << ", max_width = " << dec_cap->max_width << ", max_height = " << dec_cap->max_height << ", min_width = " << dec_cap->min_width << ", min_height = " << dec_cap->min_height << std::endl; + + return ROCDEC_SUCCESS; + } +private: + bool initialized_; + std::mutex mutex; + bool config_attributes_probed_; + + GpuVaContext() : initialized_{false}, drm_fd_{-1}, num_dec_engines_{1}, va_profile_{VAProfileNone}, config_attributes_probed_{false} { + printf("Private construction .... \n"); // Jefftest + } + GpuVaContext(const GpuVaContext&) = delete; + GpuVaContext& operator = (const GpuVaContext) = delete; + ~GpuVaContext() = default; + + rocDecStatus InitHIP(int device_id) { + CHECK_HIP(hipGetDeviceCount(&num_devices_)); + if (num_devices_ < 1) { + ERR("Didn't find any GPU."); + return ROCDEC_DEVICE_INVALID; + } + if (device_id >= num_devices_) { + ERR("ERROR: the requested device_id is not found! "); + return ROCDEC_DEVICE_INVALID; + } + CHECK_HIP(hipSetDevice(device_id)); + CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id)); + return ROCDEC_SUCCESS; + } + + rocDecStatus InitVAAPI(std::string drm_node) { + std::cout << "InitVAAPI() new .........." << std::endl; // Jefftest + drm_fd_ = open(drm_node.c_str(), O_RDWR); + if (drm_fd_ < 0) { + ERR("Failed to open drm node." + drm_node); + return ROCDEC_NOT_INITIALIZED; + } + va_display_ = vaGetDisplayDRM(drm_fd_); + if (!va_display_) { + ERR("Failed to create va_display_."); + return ROCDEC_NOT_INITIALIZED; + } + vaSetInfoCallback(va_display_, NULL, NULL); + int major_version = 0, minor_version = 0; + CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version)); + return ROCDEC_SUCCESS; + } + + void GetVisibleDevices(std::vector& visible_devices_vetor) { + char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); + if (visible_devices != nullptr) { + char *token = std::strtok(visible_devices,","); + while (token != nullptr) { + visible_devices_vetor.push_back(std::atoi(token)); + token = std::strtok(nullptr,","); + } + std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); + } + } + + void GetCurrentComputePartition(std::vector ¤t_compute_partitions) { + std::string search_path = "/sys/devices/"; + std::string partition_file = "current_compute_partition"; + std::error_code ec; + if (fs::exists(search_path)) { + for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) { + try { + if (it->path().filename() == partition_file) { + std::ifstream file(it->path()); + if (file.is_open()) { + std::string partition; + std::getline(file, partition); + if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { + current_compute_partitions.push_back(kSpx); + } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { + current_compute_partitions.push_back(kDpx); + } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { + current_compute_partitions.push_back(kTpx); + } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { + current_compute_partitions.push_back(kQpx); + } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { + current_compute_partitions.push_back(kCpx); + } + file.close(); + } + } + ++it; + } catch (fs::filesystem_error& e) { + it.increment(ec); + } + } + } + } + + void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { + if (!current_compute_partitions.empty()) { + switch (current_compute_partitions[0]) { + case kSpx: + if (device_id < visible_devices.size()) { + offset = visible_devices[device_id] * 7; + } else { + offset = device_id * 7; + } + break; + case kDpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 2) * 6; + } else { + offset = (device_id / 2) * 6; + } + break; + case kTpx: + // Please note that although there are only 6 XCCs per socket on MI300A, + // there are two dummy render nodes added by the driver. + // This needs to be taken into account when creating drm_node on each socket in TPX mode. + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 3) * 5; + } else { + offset = (device_id / 3) * 5; + } + break; + case kQpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 4) * 4; + } else { + offset = (device_id / 4) * 4; + } + break; + case kCpx: + // Please note that both MI300A and MI300X have the same gfx_arch_name which is + // gfx942. Therefore we cannot use the gfx942 to identify MI300A. + // instead use the device name and look for MI300A + // Also, as explained aboe in the TPX mode section, we need to be taken into account + // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. + std::string mi300a = "MI300A"; + size_t found_mi300a = device_name.find(mi300a); + if (found_mi300a != std::string::npos) { + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 6) * 2; + } else { + offset = (device_id / 6) * 2; + } + } + break; + } + } + } }; \ No newline at end of file diff --git a/utils/rocvideodecode/roc_video_dec.cpp b/utils/rocvideodecode/roc_video_dec.cpp index 9fd45b79..032359d7 100644 --- a/utils/rocvideodecode/roc_video_dec.cpp +++ b/utils/rocvideodecode/roc_video_dec.cpp @@ -26,7 +26,7 @@ RocVideoDecoder::RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_ const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate) : device_id_{device_id}, out_mem_type_(out_mem_type), codec_id_(codec), b_force_zero_latency_(force_zero_latency), b_extract_sei_message_(extract_user_sei_Message), disp_delay_(disp_delay), max_width_ (max_width), max_height_(max_height) { - + // Jefftest: check if we and avoid calling init hip twice if (!InitHIP(device_id_)) { THROW("Failed to initilize the HIP"); } @@ -1059,10 +1059,11 @@ bool RocVideoDecoder::CodecSupported(int device_id, rocDecVideoCodec codec_id, u decode_caps.codec_type = codec_id; decode_caps.chroma_format = rocDecVideoChromaFormat_420; decode_caps.bit_depth_minus_8 = bit_depth - 8; - if(rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) { + if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || (decode_caps.is_supported == false)) { return false; + } else { + return true; } - return true; } void RocVideoDecoder::WaitForDecodeCompletion() { From 274109542ee01981783fd46aabadb80d0b53ba9a Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Mon, 16 Dec 2024 18:00:05 -0500 Subject: [PATCH 04/10] * rocDecode/HW caps: Removed GpuVaContext class out of vaapi_videodecoder.h and into a new file. Removed debug logs. Removed roc_decoder_caps.h. --- src/rocdecode/roc_decoder_caps.h | 74 --- src/rocdecode/rocdecode_va_context.h | 495 +++++++++++++++++++++ src/rocdecode/vaapi/vaapi_videodecoder.cpp | 1 - src/rocdecode/vaapi/vaapi_videodecoder.h | 466 +------------------ utils/rocvideodecode/roc_video_dec.cpp | 1 - 5 files changed, 496 insertions(+), 541 deletions(-) delete mode 100644 src/rocdecode/roc_decoder_caps.h create mode 100644 src/rocdecode/rocdecode_va_context.h diff --git a/src/rocdecode/roc_decoder_caps.h b/src/rocdecode/roc_decoder_caps.h deleted file mode 100644 index 49181c5d..00000000 --- a/src/rocdecode/roc_decoder_caps.h +++ /dev/null @@ -1,74 +0,0 @@ -/* -Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ -#pragma once - -#include -#include -#include -#include -#include -#include "../commons.h" -#include "../../api/rocdecode.h" -#include "vaapi_videodecoder.h" - -// The RocDecVcnCodecSpec singleton class for providing access to the the vcn_spec_table -class RocDecVcnCodecSpec { -public: - static RocDecVcnCodecSpec& GetInstance() { - static RocDecVcnCodecSpec instance; - return instance; - } - rocDecStatus GetDecoderCaps(RocdecDecodeCaps *pdc) { - // Jefftest - GpuVaContext& va_ctx = GpuVaContext::GetInstance(); - va_ctx.Initialize(pdc->device_id); - if (va_ctx.CheckDecCapForCodecType(pdc) != ROCDEC_SUCCESS) { - ERR("Failed to obtain decoder capabilities from driver."); - return ROCDEC_DEVICE_INVALID; - } else { - return ROCDEC_SUCCESS; - } - } - bool IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { - RocdecDecodeCaps decode_caps; - decode_caps.device_id = device_id; - decode_caps.codec_type = codec_type; - decode_caps.chroma_format = chroma_format; - decode_caps.bit_depth_minus_8 = bit_depth_minus8; - if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || (decode_caps.is_supported == false) || ((decode_caps.output_format_mask & (1 << output_format)) == 0)) { - return false; - } else { - return true; - } - } -private: - bool initialized_; - uint32_t num_dec_engines_ = 1; - // Jefftest std::vector decode_cap_list_{0}; - std::mutex mutex; - RocDecVcnCodecSpec() { - initialized_ = false; - } - RocDecVcnCodecSpec(const RocDecVcnCodecSpec&) = delete; - RocDecVcnCodecSpec& operator = (const RocDecVcnCodecSpec) = delete; - ~RocDecVcnCodecSpec() = default; -}; \ No newline at end of file diff --git a/src/rocdecode/rocdecode_va_context.h b/src/rocdecode/rocdecode_va_context.h new file mode 100644 index 00000000..f00fed16 --- /dev/null +++ b/src/rocdecode/rocdecode_va_context.h @@ -0,0 +1,495 @@ +/* +Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if __cplusplus >= 201703L && __has_include() + #include + namespace fs = std::filesystem; +#else + #include + namespace fs = std::experimental::filesystem; +#endif +#include +#include +#include +#include +#include +#include "../../commons.h" +#include "../../../api/rocdecode.h" + +#define CHECK_HIP(call) {\ + hipError_t hip_status = call;\ + if (hip_status != hipSuccess) {\ + std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCDEC_RUNTIME_ERROR;\ + }\ +} + +#define CHECK_VAAPI(call) {\ + VAStatus va_status = call;\ + if (va_status != VA_STATUS_SUCCESS) {\ + std::cout << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCDEC_RUNTIME_ERROR;\ + }\ +} + +#define INIT_SLICE_PARAM_LIST_NUM 16 // initial slice parameter buffer list size + +typedef enum { + kSpx = 0, // Single Partition Accelerator + kDpx = 1, // Dual Partition Accelerator + kTpx = 2, // Triple Partition Accelerator + kQpx = 3, // Quad Partition Accelerator + kCpx = 4, // Core Partition Accelerator +} ComputePartition; + +// The GpuVaContext singleton class providing access to the the GPU VA services +class GpuVaContext { +public: + int num_devices_; + int device_id_; + int drm_fd_; + VADisplay va_display_; + hipDeviceProp_t hip_dev_prop_; + uint32_t num_dec_engines_; + int num_va_profiles_; + std::vector va_profile_list_; // supported profiles by the current GPU + VAProfile va_profile_; // current profile used + VAConfigID va_config_id_; + uint32_t rt_format_attrib_; + uint32_t output_format_mask_; + uint32_t max_width_; + uint32_t max_height_; + uint32_t min_width_; + uint32_t min_height_; + + static GpuVaContext& GetInstance() { + static GpuVaContext instance; + return instance; + } + + rocDecStatus Initialize(int device_id) { + if ( initialized_ && device_id != device_id_) { + CHECK_VAAPI(vaTerminate(va_display_)); + initialized_ = false; + } + if (!initialized_) { + std::lock_guard lock(mutex); + device_id_ = device_id; + rocDecStatus rocdec_status = ROCDEC_SUCCESS; + rocdec_status = InitHIP(device_id_); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize the HIP."); + return rocdec_status; + } + + std::string gcn_arch_name = hip_dev_prop_.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + std::vector visible_devices; + GetVisibleDevices(visible_devices); + + int offset = 0; + if (gcn_arch_name_base.compare("gfx942") == 0) { + std::vector current_compute_partitions; + GetCurrentComputePartition(current_compute_partitions); + if (current_compute_partitions.empty()) { + //if the current_compute_partitions is empty then the default SPX mode is assumed. + if (device_id_ < visible_devices.size()) { + offset = visible_devices[device_id_] * 7; + } else { + offset = device_id_ * 7; + } + } else { + GetDrmNodeOffset(hip_dev_prop_.name, device_id_, visible_devices, current_compute_partitions, offset); + } + } + + std::string drm_node = "/dev/dri/renderD"; + if (device_id_ < visible_devices.size()) { + drm_node += std::to_string(128 + offset + visible_devices[device_id_]); + } else { + drm_node += std::to_string(128 + offset + device_id_); + } + + rocdec_status = InitVAAPI(drm_node); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize the VAAPI."); + return rocdec_status; + } + + amdgpu_device_handle dev_handle; + uint32_t major_version = 0, minor_version = 0; + if (amdgpu_device_initialize(drm_fd_, &major_version, &minor_version, &dev_handle)) { + ERR("GPU device initialization failed: " + drm_node); + return ROCDEC_DEVICE_INVALID; + } + if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_DEC, &num_dec_engines_)) { + ERR("Failed to get the number of video decode engines."); + } + amdgpu_device_deinitialize(dev_handle); + + // Prob VA profiles + num_va_profiles_ = vaMaxNumProfiles(va_display_); + va_profile_list_.resize(num_va_profiles_); + CHECK_VAAPI(vaQueryConfigProfiles(va_display_, va_profile_list_.data(), &num_va_profiles_)); + + initialized_ = true; + } + return ROCDEC_SUCCESS; + } + + rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { + if (dec_cap == nullptr) { + ERR("Null decode capability struct pointer."); + return ROCDEC_INVALID_PARAMETER; + } + std::lock_guard lock(mutex); + rocDecStatus rocdec_status = ROCDEC_SUCCESS; + if (!initialized_) { + rocdec_status = Initialize(dec_cap->device_id); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize."); + return rocdec_status; + } + } + + dec_cap->is_supported = 1; // init value + VAProfile va_profile = VAProfileNone; + switch (dec_cap->codec_type) { + case rocDecVideoCodec_HEVC: + if (dec_cap->bit_depth_minus_8 == 0) { + va_profile = VAProfileHEVCMain; + } else if (dec_cap->bit_depth_minus_8 == 2) { + va_profile = VAProfileHEVCMain10; + } + break; + case rocDecVideoCodec_AVC: + va_profile = VAProfileH264Main; + break; + case rocDecVideoCodec_VP9: + if (dec_cap->bit_depth_minus_8 == 0) { + va_profile = VAProfileVP9Profile0; + } else if (dec_cap->bit_depth_minus_8 == 2) { + va_profile = VAProfileVP9Profile2; + } + break; + case rocDecVideoCodec_AV1: + #if VA_CHECK_VERSION(1,6,0) + va_profile = VAProfileAV1Profile0; + #else + va_profile = static_cast(32); // VAProfileAV1Profile0; + #endif + break; + default: + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + + int i; + for (i = 0; i < num_va_profiles_; i++) { + if (va_profile_list_[i] == va_profile) { + break; + } + } + if (i == num_va_profiles_) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + + // Check if the config attributes of the profile have been probed before + //if (config_attributes_probed_ == false) + if (va_profile != va_profile_ || config_attributes_probed_ == false) { + va_profile_ = va_profile; + + VAConfigAttrib va_config_attrib; + unsigned int attr_count; + std::vector attr_list; + va_config_attrib.type = VAConfigAttribRTFormat; + CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1)); + rt_format_attrib_ = va_config_attrib.value; + + CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1, &va_config_id_)); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, 0, &attr_count)); + attr_list.resize(attr_count); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, attr_list.data(), &attr_count)); + output_format_mask_ = 0; + CHECK_VAAPI(vaDestroyConfig(va_display_, va_config_id_)); + for (int k = 0; k < attr_count; k++) { + switch (attr_list[k].type) { + case VASurfaceAttribPixelFormat: { + switch (attr_list[k].value.value.i) { + case VA_FOURCC_NV12: + output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_NV12; + break; + case VA_FOURCC_P016: + output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_P016; + break; + default: + break; + } + } + break; + case VASurfaceAttribMinWidth: + min_width_ = attr_list[k].value.value.i; + break; + case VASurfaceAttribMinHeight: + min_height_ = attr_list[k].value.value.i; + break; + case VASurfaceAttribMaxWidth: + max_width_ = attr_list[k].value.value.i; + break; + case VASurfaceAttribMaxHeight: + max_height_ = attr_list[k].value.value.i; + break; + default: + break; + } + } + config_attributes_probed_ = true; + } + + // Check chroma format + switch (dec_cap->chroma_format) { + case rocDecVideoChromaFormat_Monochrome: { + if ((rt_format_attrib_ & VA_RT_FORMAT_YUV400) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_420: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV420_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_422: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV422_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_444: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV444_10 | VA_RT_FORMAT_YUV444_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + default: { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + } + // Check bit depth + switch (dec_cap->bit_depth_minus_8) { + case 0: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV400)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case 2: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV444_10)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case 4: { + if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_12 | VA_RT_FORMAT_YUV422_12 | VA_RT_FORMAT_YUV444_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + default: { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + } + + dec_cap->num_decoders = num_dec_engines_; + dec_cap->output_format_mask = output_format_mask_; + dec_cap->max_width = max_width_; + dec_cap->max_height = max_height_; + dec_cap->min_width = min_width_; + dec_cap->min_height = min_height_; + return ROCDEC_SUCCESS; + } + +private: + bool initialized_; + std::mutex mutex; + bool config_attributes_probed_; + + GpuVaContext() : initialized_{false}, drm_fd_{-1}, num_dec_engines_{1}, va_profile_{VAProfileNone}, config_attributes_probed_{false} {}; + GpuVaContext(const GpuVaContext&) = delete; + GpuVaContext& operator = (const GpuVaContext) = delete; + ~GpuVaContext() = default; + + rocDecStatus InitHIP(int device_id) { + CHECK_HIP(hipGetDeviceCount(&num_devices_)); + if (num_devices_ < 1) { + ERR("Didn't find any GPU."); + return ROCDEC_DEVICE_INVALID; + } + if (device_id >= num_devices_) { + ERR("ERROR: the requested device_id is not found! "); + return ROCDEC_DEVICE_INVALID; + } + CHECK_HIP(hipSetDevice(device_id)); + CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id)); + return ROCDEC_SUCCESS; + } + + rocDecStatus InitVAAPI(std::string drm_node) { + drm_fd_ = open(drm_node.c_str(), O_RDWR); + if (drm_fd_ < 0) { + ERR("Failed to open drm node." + drm_node); + return ROCDEC_NOT_INITIALIZED; + } + va_display_ = vaGetDisplayDRM(drm_fd_); + if (!va_display_) { + ERR("Failed to create va_display_."); + return ROCDEC_NOT_INITIALIZED; + } + vaSetInfoCallback(va_display_, NULL, NULL); + int major_version = 0, minor_version = 0; + CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version)); + return ROCDEC_SUCCESS; + } + + void GetVisibleDevices(std::vector& visible_devices_vetor) { + char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); + if (visible_devices != nullptr) { + char *token = std::strtok(visible_devices,","); + while (token != nullptr) { + visible_devices_vetor.push_back(std::atoi(token)); + token = std::strtok(nullptr,","); + } + std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); + } + } + + void GetCurrentComputePartition(std::vector ¤t_compute_partitions) { + std::string search_path = "/sys/devices/"; + std::string partition_file = "current_compute_partition"; + std::error_code ec; + if (fs::exists(search_path)) { + for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) { + try { + if (it->path().filename() == partition_file) { + std::ifstream file(it->path()); + if (file.is_open()) { + std::string partition; + std::getline(file, partition); + if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { + current_compute_partitions.push_back(kSpx); + } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { + current_compute_partitions.push_back(kDpx); + } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { + current_compute_partitions.push_back(kTpx); + } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { + current_compute_partitions.push_back(kQpx); + } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { + current_compute_partitions.push_back(kCpx); + } + file.close(); + } + } + ++it; + } catch (fs::filesystem_error& e) { + it.increment(ec); + } + } + } + } + + void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { + if (!current_compute_partitions.empty()) { + switch (current_compute_partitions[0]) { + case kSpx: + if (device_id < visible_devices.size()) { + offset = visible_devices[device_id] * 7; + } else { + offset = device_id * 7; + } + break; + case kDpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 2) * 6; + } else { + offset = (device_id / 2) * 6; + } + break; + case kTpx: + // Please note that although there are only 6 XCCs per socket on MI300A, + // there are two dummy render nodes added by the driver. + // This needs to be taken into account when creating drm_node on each socket in TPX mode. + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 3) * 5; + } else { + offset = (device_id / 3) * 5; + } + break; + case kQpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 4) * 4; + } else { + offset = (device_id / 4) * 4; + } + break; + case kCpx: + // Please note that both MI300A and MI300X have the same gfx_arch_name which is + // gfx942. Therefore we cannot use the gfx942 to identify MI300A. + // instead use the device name and look for MI300A + // Also, as explained aboe in the TPX mode section, we need to be taken into account + // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. + std::string mi300a = "MI300A"; + size_t found_mi300a = device_name.find(mi300a); + if (found_mi300a != std::string::npos) { + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 6) * 2; + } else { + offset = (device_id / 6) * 2; + } + } + break; + } + } + } +}; \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index 6cca899c..29aa8e03 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -82,7 +82,6 @@ rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string device_name, std:: return ROCDEC_NOT_SUPPORTED; } - // Jefftest GpuVaContext& va_ctx = GpuVaContext::GetInstance(); va_ctx.Initialize(decoder_create_info_.device_id); va_display_ = va_ctx.va_display_; diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index f6585fc8..96196f57 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -45,32 +45,7 @@ THE SOFTWARE. #include #include "../../commons.h" #include "../../../api/rocdecode.h" - -#define CHECK_HIP(call) {\ - hipError_t hip_status = call;\ - if (hip_status != hipSuccess) {\ - std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ - return ROCDEC_RUNTIME_ERROR;\ - }\ -} - -#define CHECK_VAAPI(call) {\ - VAStatus va_status = call;\ - if (va_status != VA_STATUS_SUCCESS) {\ - std::cout << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ - return ROCDEC_RUNTIME_ERROR;\ - }\ -} - -#define INIT_SLICE_PARAM_LIST_NUM 16 // initial slice parameter buffer list size - -typedef enum { - kSpx = 0, // Single Partition Accelerator - kDpx = 1, // Dual Partition Accelerator - kTpx = 2, // Triple Partition Accelerator - kQpx = 3, // Quad Partition Accelerator - kCpx = 4, // Core Partition Accelerator -} ComputePartition; +#include "rocdecode_va_context.h" class VaapiVideoDecoder { public: @@ -106,443 +81,4 @@ class VaapiVideoDecoder { rocDecStatus CreateSurfaces(); rocDecStatus CreateContext(); rocDecStatus DestroyDataBuffers(); -}; - -// Jefftest -// The GpuVaContext singleton class providing access to the the GPU VA services -class GpuVaContext { -public: - int num_devices_; - int device_id_; - int drm_fd_; - VADisplay va_display_; - hipDeviceProp_t hip_dev_prop_; - uint32_t num_dec_engines_; - int num_va_profiles_; - std::vector va_profile_list_; // supported profiles by the current GPU - VAProfile va_profile_; // current profile used - VAConfigID va_config_id_; - uint32_t rt_format_attrib_; - uint32_t output_format_mask_; - uint32_t max_width_; - uint32_t max_height_; - uint32_t min_width_; - uint32_t min_height_; - - static GpuVaContext& GetInstance() { - printf("Get instance .....\n"); // Jefftest - static GpuVaContext instance; - return instance; - } - - rocDecStatus Initialize(int device_id) { - printf("Initialize(): device_id = %d, initialized_ = %d\n", device_id, initialized_); // Jefftest - if ( initialized_ && device_id != device_id_) { - CHECK_VAAPI(vaTerminate(va_display_)); - initialized_ = false; - } - if (!initialized_) { - std::lock_guard lock(mutex); - device_id_ = device_id; - rocDecStatus rocdec_status = ROCDEC_SUCCESS; - rocdec_status = InitHIP(device_id_); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize the HIP."); - return rocdec_status; - } - - std::cout << hip_dev_prop_.name << std::endl; // Jefftest - std::cout << hip_dev_prop_.gcnArchName << std::endl; // Jefftest - std::string gcn_arch_name = hip_dev_prop_.gcnArchName; - std::size_t pos = gcn_arch_name.find_first_of(":"); - std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; - std::vector visible_devices; - GetVisibleDevices(visible_devices); - std::cout << visible_devices.size() << std::endl; // Jefftest - - int offset = 0; - if (gcn_arch_name_base.compare("gfx942") == 0) { - std::vector current_compute_partitions; - GetCurrentComputePartition(current_compute_partitions); - if (current_compute_partitions.empty()) { - //if the current_compute_partitions is empty then the default SPX mode is assumed. - if (device_id_ < visible_devices.size()) { - offset = visible_devices[device_id_] * 7; - } else { - offset = device_id_ * 7; - } - } else { - GetDrmNodeOffset(hip_dev_prop_.name, device_id_, visible_devices, current_compute_partitions, offset); - } - } - - std::string drm_node = "/dev/dri/renderD"; - if (device_id_ < visible_devices.size()) { - drm_node += std::to_string(128 + offset + visible_devices[device_id_]); - } else { - drm_node += std::to_string(128 + offset + device_id_); - } - std::cout << "drm_node = " << drm_node << std::endl; // Jefftest - - rocdec_status = InitVAAPI(drm_node); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize the VAAPI."); - return rocdec_status; - } - - amdgpu_device_handle dev_handle; - uint32_t major_version = 0, minor_version = 0; - if (amdgpu_device_initialize(drm_fd_, &major_version, &minor_version, &dev_handle)) { - ERR("GPU device initialization failed: " + drm_node); - return ROCDEC_DEVICE_INVALID; - } - if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_DEC, &num_dec_engines_)) { - ERR("Failed to get the number of video decode engines."); - } - printf("num_dec_engines_ = %d ....\n", num_dec_engines_); // Jefftest - amdgpu_device_deinitialize(dev_handle); - - // Prob VA profiles - num_va_profiles_ = vaMaxNumProfiles(va_display_); - std::cout << "num_va_profiles_ = " << num_va_profiles_ << std::endl; // Jefftest - va_profile_list_.resize(num_va_profiles_); - CHECK_VAAPI(vaQueryConfigProfiles(va_display_, va_profile_list_.data(), &num_va_profiles_)); - std::cout << "num_va_profiles_ = " << num_va_profiles_ << std::endl; // Jefftest - - - initialized_ = true; - } - return ROCDEC_SUCCESS; - } - - rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { - if (dec_cap == nullptr) { - ERR("Null decode capability struct pointer."); - return ROCDEC_INVALID_PARAMETER; - } - std::lock_guard lock(mutex); - rocDecStatus rocdec_status = ROCDEC_SUCCESS; - if (!initialized_) { - rocdec_status = Initialize(dec_cap->device_id); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize."); - return rocdec_status; - } - } - - dec_cap->is_supported = 1; // init value - VAProfile va_profile = VAProfileNone; - switch (dec_cap->codec_type) { - case rocDecVideoCodec_HEVC: - if (dec_cap->bit_depth_minus_8 == 0) { - va_profile = VAProfileHEVCMain; - } else if (dec_cap->bit_depth_minus_8 == 2) { - va_profile = VAProfileHEVCMain10; - } - break; - case rocDecVideoCodec_AVC: - va_profile = VAProfileH264Main; - break; - case rocDecVideoCodec_VP9: - if (dec_cap->bit_depth_minus_8 == 0) { - va_profile = VAProfileVP9Profile0; - } else if (dec_cap->bit_depth_minus_8 == 2) { - va_profile = VAProfileVP9Profile2; - } - break; - case rocDecVideoCodec_AV1: - #if VA_CHECK_VERSION(1,6,0) - va_profile = VAProfileAV1Profile0; - #else - va_profile = static_cast(32); // VAProfileAV1Profile0; - #endif - break; - default: - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - - int i; - for (i = 0; i < num_va_profiles_; i++) { - if (va_profile_list_[i] == va_profile) { - break; - } - } - if (i == num_va_profiles_) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - - // Check if the config attributes of the profile have been probed before - //if (config_attributes_probed_ == false) - if (va_profile != va_profile_ || config_attributes_probed_ == false) { - va_profile_ = va_profile; - - std::cout << "Create VA config .... " << std::endl; // Jefftest - VAConfigAttrib va_config_attrib; - unsigned int attr_count; - std::vector attr_list; - va_config_attrib.type = VAConfigAttribRTFormat; - CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1)); - rt_format_attrib_ = va_config_attrib.value; - - CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1, &va_config_id_)); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, 0, &attr_count)); - attr_list.resize(attr_count); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, attr_list.data(), &attr_count)); - output_format_mask_ = 0; - CHECK_VAAPI(vaDestroyConfig(va_display_, va_config_id_)); - for (int k = 0; k < attr_count; k++) { - switch (attr_list[k].type) { - case VASurfaceAttribPixelFormat: { - switch (attr_list[k].value.value.i) { - case VA_FOURCC_NV12: - output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_NV12; - break; - case VA_FOURCC_P016: - output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_P016; - break; - default: - break; - } - } - break; - case VASurfaceAttribMinWidth: - min_width_ = attr_list[k].value.value.i; - break; - case VASurfaceAttribMinHeight: - min_height_ = attr_list[k].value.value.i; - break; - case VASurfaceAttribMaxWidth: - max_width_ = attr_list[k].value.value.i; - break; - case VASurfaceAttribMaxHeight: - max_height_ = attr_list[k].value.value.i; - break; - default: - break; - } - } - config_attributes_probed_ = true; - } - - // Check chroma format - switch (dec_cap->chroma_format) { - case rocDecVideoChromaFormat_Monochrome: { - if ((rt_format_attrib_ & VA_RT_FORMAT_YUV400) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case rocDecVideoChromaFormat_420: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV420_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case rocDecVideoChromaFormat_422: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV422_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case rocDecVideoChromaFormat_444: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV444_10 | VA_RT_FORMAT_YUV444_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - default: { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - } - // Check bit depth - switch (dec_cap->bit_depth_minus_8) { - case 0: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV400)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case 2: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV444_10)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case 4: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_12 | VA_RT_FORMAT_YUV422_12 | VA_RT_FORMAT_YUV444_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - default: { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - } - - dec_cap->num_decoders = num_dec_engines_; - dec_cap->output_format_mask = output_format_mask_; - dec_cap->max_width = max_width_; - dec_cap->max_height = max_height_; - dec_cap->min_width = min_width_; - dec_cap->min_height = min_height_; - // Jefftest - std::cout << "devicde_id = " << (int)dec_cap->device_id << ", codec_type = " << dec_cap->codec_type << ", chroma_format = " << dec_cap->chroma_format << ", bit_depth_minus_8 = " << dec_cap->bit_depth_minus_8 << ", is_supported = " << (int)dec_cap->is_supported << ", num_decoders = " << (int)dec_cap->num_decoders << ", output_format_mask = " << dec_cap->output_format_mask << ", max_width = " << dec_cap->max_width << ", max_height = " << dec_cap->max_height << ", min_width = " << dec_cap->min_width << ", min_height = " << dec_cap->min_height << std::endl; - - return ROCDEC_SUCCESS; - } -private: - bool initialized_; - std::mutex mutex; - bool config_attributes_probed_; - - GpuVaContext() : initialized_{false}, drm_fd_{-1}, num_dec_engines_{1}, va_profile_{VAProfileNone}, config_attributes_probed_{false} { - printf("Private construction .... \n"); // Jefftest - } - GpuVaContext(const GpuVaContext&) = delete; - GpuVaContext& operator = (const GpuVaContext) = delete; - ~GpuVaContext() = default; - - rocDecStatus InitHIP(int device_id) { - CHECK_HIP(hipGetDeviceCount(&num_devices_)); - if (num_devices_ < 1) { - ERR("Didn't find any GPU."); - return ROCDEC_DEVICE_INVALID; - } - if (device_id >= num_devices_) { - ERR("ERROR: the requested device_id is not found! "); - return ROCDEC_DEVICE_INVALID; - } - CHECK_HIP(hipSetDevice(device_id)); - CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id)); - return ROCDEC_SUCCESS; - } - - rocDecStatus InitVAAPI(std::string drm_node) { - std::cout << "InitVAAPI() new .........." << std::endl; // Jefftest - drm_fd_ = open(drm_node.c_str(), O_RDWR); - if (drm_fd_ < 0) { - ERR("Failed to open drm node." + drm_node); - return ROCDEC_NOT_INITIALIZED; - } - va_display_ = vaGetDisplayDRM(drm_fd_); - if (!va_display_) { - ERR("Failed to create va_display_."); - return ROCDEC_NOT_INITIALIZED; - } - vaSetInfoCallback(va_display_, NULL, NULL); - int major_version = 0, minor_version = 0; - CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version)); - return ROCDEC_SUCCESS; - } - - void GetVisibleDevices(std::vector& visible_devices_vetor) { - char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); - if (visible_devices != nullptr) { - char *token = std::strtok(visible_devices,","); - while (token != nullptr) { - visible_devices_vetor.push_back(std::atoi(token)); - token = std::strtok(nullptr,","); - } - std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); - } - } - - void GetCurrentComputePartition(std::vector ¤t_compute_partitions) { - std::string search_path = "/sys/devices/"; - std::string partition_file = "current_compute_partition"; - std::error_code ec; - if (fs::exists(search_path)) { - for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) { - try { - if (it->path().filename() == partition_file) { - std::ifstream file(it->path()); - if (file.is_open()) { - std::string partition; - std::getline(file, partition); - if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { - current_compute_partitions.push_back(kSpx); - } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { - current_compute_partitions.push_back(kDpx); - } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { - current_compute_partitions.push_back(kTpx); - } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { - current_compute_partitions.push_back(kQpx); - } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { - current_compute_partitions.push_back(kCpx); - } - file.close(); - } - } - ++it; - } catch (fs::filesystem_error& e) { - it.increment(ec); - } - } - } - } - - void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { - if (!current_compute_partitions.empty()) { - switch (current_compute_partitions[0]) { - case kSpx: - if (device_id < visible_devices.size()) { - offset = visible_devices[device_id] * 7; - } else { - offset = device_id * 7; - } - break; - case kDpx: - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 2) * 6; - } else { - offset = (device_id / 2) * 6; - } - break; - case kTpx: - // Please note that although there are only 6 XCCs per socket on MI300A, - // there are two dummy render nodes added by the driver. - // This needs to be taken into account when creating drm_node on each socket in TPX mode. - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 3) * 5; - } else { - offset = (device_id / 3) * 5; - } - break; - case kQpx: - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 4) * 4; - } else { - offset = (device_id / 4) * 4; - } - break; - case kCpx: - // Please note that both MI300A and MI300X have the same gfx_arch_name which is - // gfx942. Therefore we cannot use the gfx942 to identify MI300A. - // instead use the device name and look for MI300A - // Also, as explained aboe in the TPX mode section, we need to be taken into account - // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. - std::string mi300a = "MI300A"; - size_t found_mi300a = device_name.find(mi300a); - if (found_mi300a != std::string::npos) { - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 6) * 2; - } else { - offset = (device_id / 6) * 2; - } - } - break; - } - } - } }; \ No newline at end of file diff --git a/utils/rocvideodecode/roc_video_dec.cpp b/utils/rocvideodecode/roc_video_dec.cpp index 032359d7..f0ac85e0 100644 --- a/utils/rocvideodecode/roc_video_dec.cpp +++ b/utils/rocvideodecode/roc_video_dec.cpp @@ -26,7 +26,6 @@ RocVideoDecoder::RocVideoDecoder(int device_id, OutputSurfaceMemoryType out_mem_ const Rect *p_crop_rect, bool extract_user_sei_Message, uint32_t disp_delay, int max_width, int max_height, uint32_t clk_rate) : device_id_{device_id}, out_mem_type_(out_mem_type), codec_id_(codec), b_force_zero_latency_(force_zero_latency), b_extract_sei_message_(extract_user_sei_Message), disp_delay_(disp_delay), max_width_ (max_width), max_height_(max_height) { - // Jefftest: check if we and avoid calling init hip twice if (!InitHIP(device_id_)) { THROW("Failed to initilize the HIP"); } From 32af3f6c59ae3e294f925c2c5e2799e9702c367b Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Mon, 16 Dec 2024 21:28:54 -0500 Subject: [PATCH 05/10] * rocDecode/HW caps: Fixed a crash issue with multi-thread cases. Called to vaInitialize() and vaTerminte() should be paired. --- src/rocdecode/rocdecode_va_context.h | 10 ++++++++-- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 4 ---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/rocdecode/rocdecode_va_context.h b/src/rocdecode/rocdecode_va_context.h index f00fed16..1cea5348 100644 --- a/src/rocdecode/rocdecode_va_context.h +++ b/src/rocdecode/rocdecode_va_context.h @@ -356,10 +356,16 @@ class GpuVaContext { std::mutex mutex; bool config_attributes_probed_; - GpuVaContext() : initialized_{false}, drm_fd_{-1}, num_dec_engines_{1}, va_profile_{VAProfileNone}, config_attributes_probed_{false} {}; + GpuVaContext() : initialized_{false}, drm_fd_{-1}, va_display_{0}, num_dec_engines_{1}, va_profile_{VAProfileNone}, config_attributes_probed_{false} {}; GpuVaContext(const GpuVaContext&) = delete; GpuVaContext& operator = (const GpuVaContext) = delete; - ~GpuVaContext() = default; + ~GpuVaContext() { + if (va_display_) { + if (vaTerminate(va_display_) != VA_STATUS_SUCCESS) { + ERR("Failed to termiate VA"); + } + } + }; rocDecStatus InitHIP(int device_id) { CHECK_HIP(hipGetDeviceCount(&num_devices_)); diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index 29aa8e03..bb3aa8d8 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -52,10 +52,6 @@ VaapiVideoDecoder::~VaapiVideoDecoder() { if (va_status != VA_STATUS_SUCCESS) { ERR("vaDestroyConfig failed"); } - va_status = vaTerminate(va_display_); - if (va_status != VA_STATUS_SUCCESS) { - ERR("vaTerminate failed"); - } } } From 5699aacffd13745dcb1e3e61a67866ef5db0f365 Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Fri, 20 Dec 2024 16:01:07 -0500 Subject: [PATCH 06/10] * rocDecode/HW caps: Added multi-GPU support. --- src/rocdecode/rocdecode_api.cpp | 11 +- src/rocdecode/rocdecode_va_context.h | 249 ++++++++++++--------- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 11 +- src/rocdecode/vaapi/vaapi_videodecoder.h | 2 + 4 files changed, 161 insertions(+), 112 deletions(-) diff --git a/src/rocdecode/rocdecode_api.cpp b/src/rocdecode/rocdecode_api.cpp index 4af07f8c..b0200b11 100644 --- a/src/rocdecode/rocdecode_api.cpp +++ b/src/rocdecode/rocdecode_api.cpp @@ -73,10 +73,15 @@ rocDecGetDecoderCaps(RocdecDecodeCaps *pdc) { return ROCDEC_INVALID_PARAMETER; } GpuVaContext& va_ctx = GpuVaContext::GetInstance(); - va_ctx.Initialize(pdc->device_id); - if (va_ctx.CheckDecCapForCodecType(pdc) != ROCDEC_SUCCESS) { + rocDecStatus ret = ROCDEC_SUCCESS; + uint32_t va_ctx_id; + if ((ret = va_ctx.GetVaContext(pdc->device_id, &va_ctx_id)) != ROCDEC_SUCCESS) { + ERR("Failed to get VA context."); + return ret; + } + if ((ret = va_ctx.CheckDecCapForCodecType(pdc)) != ROCDEC_SUCCESS) { ERR("Failed to obtain decoder capabilities from driver."); - return ROCDEC_DEVICE_INVALID; + return ret; } else { return ROCDEC_SUCCESS; } diff --git a/src/rocdecode/rocdecode_va_context.h b/src/rocdecode/rocdecode_va_context.h index 1cea5348..8d35711e 100644 --- a/src/rocdecode/rocdecode_va_context.h +++ b/src/rocdecode/rocdecode_va_context.h @@ -62,8 +62,6 @@ THE SOFTWARE. }\ } -#define INIT_SLICE_PARAM_LIST_NUM 16 // initial slice parameter buffer list size - typedef enum { kSpx = 0, // Single Partition Accelerator kDpx = 1, // Dual Partition Accelerator @@ -72,47 +70,70 @@ typedef enum { kCpx = 4, // Core Partition Accelerator } ComputePartition; +typedef struct { + int num_devices; + int device_id; + int drm_fd; + VADisplay va_display; + hipDeviceProp_t hip_dev_prop; + uint32_t num_dec_engines; + int num_va_profiles; + std::vector va_profile_list; // supported profiles by the current GPU + VAProfile va_profile; // current profile used + VAConfigID va_config_id; + bool config_attributes_probed; + uint32_t rt_format_attrib; + uint32_t output_format_mask; + uint32_t max_width; + uint32_t max_height; + uint32_t min_width; + uint32_t min_height; +} VaContextInfo; + // The GpuVaContext singleton class providing access to the the GPU VA services class GpuVaContext { public: - int num_devices_; - int device_id_; - int drm_fd_; - VADisplay va_display_; - hipDeviceProp_t hip_dev_prop_; - uint32_t num_dec_engines_; - int num_va_profiles_; - std::vector va_profile_list_; // supported profiles by the current GPU - VAProfile va_profile_; // current profile used - VAConfigID va_config_id_; - uint32_t rt_format_attrib_; - uint32_t output_format_mask_; - uint32_t max_width_; - uint32_t max_height_; - uint32_t min_width_; - uint32_t min_height_; + std::vector va_contexts_; static GpuVaContext& GetInstance() { static GpuVaContext instance; return instance; } - rocDecStatus Initialize(int device_id) { - if ( initialized_ && device_id != device_id_) { - CHECK_VAAPI(vaTerminate(va_display_)); - initialized_ = false; + rocDecStatus GetVaContext(int device_id, uint32_t *va_ctx_id) { + std::lock_guard lock(mutex); + bool found_existing = false; + uint32_t va_ctx_idx = 0; + if (!va_contexts_.empty()) { + for (va_ctx_idx = 0; va_ctx_idx < va_contexts_.size(); va_ctx_idx++) { + if (device_id == va_contexts_[va_ctx_idx].device_id) { + found_existing = true; + break; + } + } } - if (!initialized_) { - std::lock_guard lock(mutex); - device_id_ = device_id; + if (found_existing) { + *va_ctx_id = va_ctx_idx; + return ROCDEC_SUCCESS; + } else { + va_contexts_.resize(va_contexts_.size() + 1); + va_ctx_idx = va_contexts_.size() - 1; + + va_contexts_[va_ctx_idx].device_id = device_id; + va_contexts_[va_ctx_idx].drm_fd = -1; + va_contexts_[va_ctx_idx].va_display = 0; + va_contexts_[va_ctx_idx].num_dec_engines = 1; + va_contexts_[va_ctx_idx].va_profile = VAProfileNone; + va_contexts_[va_ctx_idx].config_attributes_probed = false; + rocDecStatus rocdec_status = ROCDEC_SUCCESS; - rocdec_status = InitHIP(device_id_); + rocdec_status = InitHIP(va_ctx_idx); if (rocdec_status != ROCDEC_SUCCESS) { ERR("Failed to initilize the HIP."); return rocdec_status; } - std::string gcn_arch_name = hip_dev_prop_.gcnArchName; + std::string gcn_arch_name = va_contexts_[va_ctx_idx].hip_dev_prop.gcnArchName; std::size_t pos = gcn_arch_name.find_first_of(":"); std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; std::vector visible_devices; @@ -124,24 +145,24 @@ class GpuVaContext { GetCurrentComputePartition(current_compute_partitions); if (current_compute_partitions.empty()) { //if the current_compute_partitions is empty then the default SPX mode is assumed. - if (device_id_ < visible_devices.size()) { - offset = visible_devices[device_id_] * 7; + if (va_contexts_[va_ctx_idx].device_id < visible_devices.size()) { + offset = visible_devices[va_contexts_[va_ctx_idx].device_id] * 7; } else { - offset = device_id_ * 7; + offset = va_contexts_[va_ctx_idx].device_id * 7; } } else { - GetDrmNodeOffset(hip_dev_prop_.name, device_id_, visible_devices, current_compute_partitions, offset); + GetDrmNodeOffset(va_contexts_[va_ctx_idx].hip_dev_prop.name, va_contexts_[va_ctx_idx].device_id, visible_devices, current_compute_partitions, offset); } } std::string drm_node = "/dev/dri/renderD"; - if (device_id_ < visible_devices.size()) { - drm_node += std::to_string(128 + offset + visible_devices[device_id_]); + if (va_contexts_[va_ctx_idx].device_id < visible_devices.size()) { + drm_node += std::to_string(128 + offset + visible_devices[va_contexts_[va_ctx_idx].device_id]); } else { - drm_node += std::to_string(128 + offset + device_id_); + drm_node += std::to_string(128 + offset + va_contexts_[va_ctx_idx].device_id); } - rocdec_status = InitVAAPI(drm_node); + rocdec_status = InitVAAPI(va_ctx_idx, drm_node); if (rocdec_status != ROCDEC_SUCCESS) { ERR("Failed to initilize the VAAPI."); return rocdec_status; @@ -149,23 +170,34 @@ class GpuVaContext { amdgpu_device_handle dev_handle; uint32_t major_version = 0, minor_version = 0; - if (amdgpu_device_initialize(drm_fd_, &major_version, &minor_version, &dev_handle)) { + if (amdgpu_device_initialize(va_contexts_[va_ctx_idx].drm_fd, &major_version, &minor_version, &dev_handle)) { ERR("GPU device initialization failed: " + drm_node); return ROCDEC_DEVICE_INVALID; } - if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_DEC, &num_dec_engines_)) { + if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_DEC, &va_contexts_[va_ctx_idx].num_dec_engines)) { ERR("Failed to get the number of video decode engines."); } amdgpu_device_deinitialize(dev_handle); // Prob VA profiles - num_va_profiles_ = vaMaxNumProfiles(va_display_); - va_profile_list_.resize(num_va_profiles_); - CHECK_VAAPI(vaQueryConfigProfiles(va_display_, va_profile_list_.data(), &num_va_profiles_)); + va_contexts_[va_ctx_idx].num_va_profiles = vaMaxNumProfiles(va_contexts_[va_ctx_idx].va_display); + va_contexts_[va_ctx_idx].va_profile_list.resize(va_contexts_[va_ctx_idx].num_va_profiles); + CHECK_VAAPI(vaQueryConfigProfiles(va_contexts_[va_ctx_idx].va_display, va_contexts_[va_ctx_idx].va_profile_list.data(), &va_contexts_[va_ctx_idx].num_va_profiles)); - initialized_ = true; + *va_ctx_id = va_ctx_idx; + return ROCDEC_SUCCESS; + } + } + + rocDecStatus GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display) { + if (va_ctx_id >= va_contexts_.size()) { + ERR("Invalid VA context Id."); + *va_display = 0; + return ROCDEC_INVALID_PARAMETER; + } else { + *va_display = va_contexts_[va_ctx_id].va_display; + return ROCDEC_SUCCESS; } - return ROCDEC_SUCCESS; } rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { @@ -173,86 +205,89 @@ class GpuVaContext { ERR("Null decode capability struct pointer."); return ROCDEC_INVALID_PARAMETER; } - std::lock_guard lock(mutex); rocDecStatus rocdec_status = ROCDEC_SUCCESS; - if (!initialized_) { - rocdec_status = Initialize(dec_cap->device_id); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize."); - return rocdec_status; - } + uint32_t va_ctx_id; + rocdec_status = GetVaContext(dec_cap->device_id, &va_ctx_id); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize."); + return rocdec_status; } + std::lock_guard lock(mutex); dec_cap->is_supported = 1; // init value VAProfile va_profile = VAProfileNone; switch (dec_cap->codec_type) { - case rocDecVideoCodec_HEVC: + case rocDecVideoCodec_HEVC: { if (dec_cap->bit_depth_minus_8 == 0) { va_profile = VAProfileHEVCMain; } else if (dec_cap->bit_depth_minus_8 == 2) { va_profile = VAProfileHEVCMain10; } break; - case rocDecVideoCodec_AVC: + } + case rocDecVideoCodec_AVC: { va_profile = VAProfileH264Main; break; - case rocDecVideoCodec_VP9: + } + case rocDecVideoCodec_VP9: { if (dec_cap->bit_depth_minus_8 == 0) { va_profile = VAProfileVP9Profile0; } else if (dec_cap->bit_depth_minus_8 == 2) { va_profile = VAProfileVP9Profile2; } break; - case rocDecVideoCodec_AV1: + } + case rocDecVideoCodec_AV1: { #if VA_CHECK_VERSION(1,6,0) va_profile = VAProfileAV1Profile0; #else va_profile = static_cast(32); // VAProfileAV1Profile0; #endif break; - default: + } + default: { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; + } } int i; - for (i = 0; i < num_va_profiles_; i++) { - if (va_profile_list_[i] == va_profile) { + for (i = 0; i < va_contexts_[va_ctx_id].num_va_profiles; i++) { + if (va_contexts_[va_ctx_id].va_profile_list[i] == va_profile) { break; } } - if (i == num_va_profiles_) { + if (i == va_contexts_[va_ctx_id].num_va_profiles) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } // Check if the config attributes of the profile have been probed before - //if (config_attributes_probed_ == false) - if (va_profile != va_profile_ || config_attributes_probed_ == false) { - va_profile_ = va_profile; + if (va_profile != va_contexts_[va_ctx_id].va_profile || va_contexts_[va_ctx_id].config_attributes_probed == false) { + va_contexts_[va_ctx_id].va_profile = va_profile; VAConfigAttrib va_config_attrib; unsigned int attr_count; std::vector attr_list; va_config_attrib.type = VAConfigAttribRTFormat; - CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1)); - rt_format_attrib_ = va_config_attrib.value; + CHECK_VAAPI(vaGetConfigAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_profile, VAEntrypointVLD, &va_config_attrib, 1)); + va_contexts_[va_ctx_id].rt_format_attrib = va_config_attrib.value; - CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib, 1, &va_config_id_)); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, 0, &attr_count)); + CHECK_VAAPI(vaCreateConfig(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_profile, VAEntrypointVLD, &va_config_attrib, 1, &va_contexts_[va_ctx_id].va_config_id)); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id, 0, &attr_count)); attr_list.resize(attr_count); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, attr_list.data(), &attr_count)); - output_format_mask_ = 0; - CHECK_VAAPI(vaDestroyConfig(va_display_, va_config_id_)); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id, attr_list.data(), &attr_count)); + va_contexts_[va_ctx_id].output_format_mask = 0; + CHECK_VAAPI(vaDestroyConfig(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id)); for (int k = 0; k < attr_count; k++) { switch (attr_list[k].type) { case VASurfaceAttribPixelFormat: { switch (attr_list[k].value.value.i) { case VA_FOURCC_NV12: - output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_NV12; + va_contexts_[va_ctx_id].output_format_mask |= 1 << rocDecVideoSurfaceFormat_NV12; break; case VA_FOURCC_P016: - output_format_mask_ |= 1 << rocDecVideoSurfaceFormat_P016; + va_contexts_[va_ctx_id].output_format_mask |= 1 << rocDecVideoSurfaceFormat_P016; break; default: break; @@ -260,49 +295,49 @@ class GpuVaContext { } break; case VASurfaceAttribMinWidth: - min_width_ = attr_list[k].value.value.i; + va_contexts_[va_ctx_id].min_width = attr_list[k].value.value.i; break; case VASurfaceAttribMinHeight: - min_height_ = attr_list[k].value.value.i; + va_contexts_[va_ctx_id].min_height = attr_list[k].value.value.i; break; case VASurfaceAttribMaxWidth: - max_width_ = attr_list[k].value.value.i; + va_contexts_[va_ctx_id].max_width = attr_list[k].value.value.i; break; case VASurfaceAttribMaxHeight: - max_height_ = attr_list[k].value.value.i; + va_contexts_[va_ctx_id].max_height = attr_list[k].value.value.i; break; default: break; } } - config_attributes_probed_ = true; + va_contexts_[va_ctx_id].config_attributes_probed = true; } // Check chroma format switch (dec_cap->chroma_format) { case rocDecVideoChromaFormat_Monochrome: { - if ((rt_format_attrib_ & VA_RT_FORMAT_YUV400) == 0) { + if ((va_contexts_[va_ctx_id].rt_format_attrib & VA_RT_FORMAT_YUV400) == 0) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } break; } case rocDecVideoChromaFormat_420: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV420_12)) == 0) { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV420_12)) == 0) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } break; } case rocDecVideoChromaFormat_422: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV422_12)) == 0) { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV422_12)) == 0) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } break; } case rocDecVideoChromaFormat_444: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV444_10 | VA_RT_FORMAT_YUV444_12)) == 0) { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV444_10 | VA_RT_FORMAT_YUV444_12)) == 0) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } @@ -316,21 +351,21 @@ class GpuVaContext { // Check bit depth switch (dec_cap->bit_depth_minus_8) { case 0: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV400)) == 0) { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV400)) == 0) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } break; } case 2: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV444_10)) == 0) { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV444_10)) == 0) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } break; } case 4: { - if ((rt_format_attrib_ & (VA_RT_FORMAT_YUV420_12 | VA_RT_FORMAT_YUV422_12 | VA_RT_FORMAT_YUV444_12)) == 0) { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420_12 | VA_RT_FORMAT_YUV422_12 | VA_RT_FORMAT_YUV444_12)) == 0) { dec_cap->is_supported = 0; return ROCDEC_SUCCESS; } @@ -342,60 +377,60 @@ class GpuVaContext { } } - dec_cap->num_decoders = num_dec_engines_; - dec_cap->output_format_mask = output_format_mask_; - dec_cap->max_width = max_width_; - dec_cap->max_height = max_height_; - dec_cap->min_width = min_width_; - dec_cap->min_height = min_height_; + dec_cap->num_decoders = va_contexts_[va_ctx_id].num_dec_engines; + dec_cap->output_format_mask = va_contexts_[va_ctx_id].output_format_mask; + dec_cap->max_width = va_contexts_[va_ctx_id].max_width; + dec_cap->max_height = va_contexts_[va_ctx_id].max_height; + dec_cap->min_width = va_contexts_[va_ctx_id].min_width; + dec_cap->min_height = va_contexts_[va_ctx_id].min_height; return ROCDEC_SUCCESS; } private: - bool initialized_; std::mutex mutex; - bool config_attributes_probed_; - GpuVaContext() : initialized_{false}, drm_fd_{-1}, va_display_{0}, num_dec_engines_{1}, va_profile_{VAProfileNone}, config_attributes_probed_{false} {}; + GpuVaContext() {}; GpuVaContext(const GpuVaContext&) = delete; GpuVaContext& operator = (const GpuVaContext) = delete; ~GpuVaContext() { - if (va_display_) { - if (vaTerminate(va_display_) != VA_STATUS_SUCCESS) { - ERR("Failed to termiate VA"); + for (int i = 0; i < va_contexts_.size(); i++) { + if (va_contexts_[i].va_display) { + if (vaTerminate(va_contexts_[i].va_display) != VA_STATUS_SUCCESS) { + ERR("Failed to termiate VA"); + } } } }; - rocDecStatus InitHIP(int device_id) { - CHECK_HIP(hipGetDeviceCount(&num_devices_)); - if (num_devices_ < 1) { + rocDecStatus InitHIP(int va_ctx_idx) { + CHECK_HIP(hipGetDeviceCount(&va_contexts_[va_ctx_idx].num_devices)); + if (va_contexts_[va_ctx_idx].num_devices < 1) { ERR("Didn't find any GPU."); return ROCDEC_DEVICE_INVALID; } - if (device_id >= num_devices_) { + if (va_contexts_[va_ctx_idx].device_id >= va_contexts_[va_ctx_idx].num_devices) { ERR("ERROR: the requested device_id is not found! "); return ROCDEC_DEVICE_INVALID; } - CHECK_HIP(hipSetDevice(device_id)); - CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop_, device_id)); + CHECK_HIP(hipSetDevice(va_contexts_[va_ctx_idx].device_id)); + CHECK_HIP(hipGetDeviceProperties(&va_contexts_[va_ctx_idx].hip_dev_prop, va_contexts_[va_ctx_idx].device_id)); return ROCDEC_SUCCESS; } - rocDecStatus InitVAAPI(std::string drm_node) { - drm_fd_ = open(drm_node.c_str(), O_RDWR); - if (drm_fd_ < 0) { + rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node) { + va_contexts_[va_ctx_idx].drm_fd = open(drm_node.c_str(), O_RDWR); + if (va_contexts_[va_ctx_idx].drm_fd < 0) { ERR("Failed to open drm node." + drm_node); return ROCDEC_NOT_INITIALIZED; } - va_display_ = vaGetDisplayDRM(drm_fd_); - if (!va_display_) { - ERR("Failed to create va_display_."); + va_contexts_[va_ctx_idx].va_display = vaGetDisplayDRM(va_contexts_[va_ctx_idx].drm_fd); + if (!va_contexts_[va_ctx_idx].va_display) { + ERR("Failed to create VA display."); return ROCDEC_NOT_INITIALIZED; } - vaSetInfoCallback(va_display_, NULL, NULL); + vaSetInfoCallback(va_contexts_[va_ctx_idx].va_display, NULL, NULL); int major_version = 0, minor_version = 0; - CHECK_VAAPI(vaInitialize(va_display_, &major_version, &minor_version)); + CHECK_VAAPI(vaInitialize(va_contexts_[va_ctx_idx].va_display, &major_version, &minor_version)); return ROCDEC_SUCCESS; } diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index bb3aa8d8..8d7ec523 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -79,8 +79,15 @@ rocDecStatus VaapiVideoDecoder::InitializeDecoder(std::string device_name, std:: } GpuVaContext& va_ctx = GpuVaContext::GetInstance(); - va_ctx.Initialize(decoder_create_info_.device_id); - va_display_ = va_ctx.va_display_; + uint32_t va_ctx_id; + if ((rocdec_status = va_ctx.GetVaContext(decoder_create_info_.device_id, &va_ctx_id)) != ROCDEC_SUCCESS) { + ERR("Failed to get VA context."); + return rocdec_status; + } + if ((rocdec_status = va_ctx.GetVaDisplay(va_ctx_id, &va_display_)) != ROCDEC_SUCCESS) { + ERR("Failed to get VA display."); + return rocdec_status; + } rocdec_status = CreateDecoderConfig(); if (rocdec_status != ROCDEC_SUCCESS) { ERR("Failed to create a VAAPI decoder configuration."); diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index 96196f57..ec6399c7 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -47,6 +47,8 @@ THE SOFTWARE. #include "../../../api/rocdecode.h" #include "rocdecode_va_context.h" +#define INIT_SLICE_PARAM_LIST_NUM 16 // initial slice parameter buffer list size + class VaapiVideoDecoder { public: VaapiVideoDecoder(RocDecoderCreateInfo &decoder_create_info); From b61ec92bea3e82954fde833d0f2727ca98ba3fac Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Thu, 26 Dec 2024 11:56:09 -0500 Subject: [PATCH 07/10] * rocDecode/HW cap: Moved GpuVaContext class implementation back to VAAPI layer. --- src/rocdecode/rocdecode_va_context.h | 536 --------------------- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 428 ++++++++++++++++ src/rocdecode/vaapi/vaapi_videodecoder.h | 74 ++- 3 files changed, 501 insertions(+), 537 deletions(-) delete mode 100644 src/rocdecode/rocdecode_va_context.h diff --git a/src/rocdecode/rocdecode_va_context.h b/src/rocdecode/rocdecode_va_context.h deleted file mode 100644 index 8d35711e..00000000 --- a/src/rocdecode/rocdecode_va_context.h +++ /dev/null @@ -1,536 +0,0 @@ -/* -Copyright (c) 2023 - 2024 Advanced Micro Devices, Inc. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if __cplusplus >= 201703L && __has_include() - #include - namespace fs = std::filesystem; -#else - #include - namespace fs = std::experimental::filesystem; -#endif -#include -#include -#include -#include -#include -#include "../../commons.h" -#include "../../../api/rocdecode.h" - -#define CHECK_HIP(call) {\ - hipError_t hip_status = call;\ - if (hip_status != hipSuccess) {\ - std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ - return ROCDEC_RUNTIME_ERROR;\ - }\ -} - -#define CHECK_VAAPI(call) {\ - VAStatus va_status = call;\ - if (va_status != VA_STATUS_SUCCESS) {\ - std::cout << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ - return ROCDEC_RUNTIME_ERROR;\ - }\ -} - -typedef enum { - kSpx = 0, // Single Partition Accelerator - kDpx = 1, // Dual Partition Accelerator - kTpx = 2, // Triple Partition Accelerator - kQpx = 3, // Quad Partition Accelerator - kCpx = 4, // Core Partition Accelerator -} ComputePartition; - -typedef struct { - int num_devices; - int device_id; - int drm_fd; - VADisplay va_display; - hipDeviceProp_t hip_dev_prop; - uint32_t num_dec_engines; - int num_va_profiles; - std::vector va_profile_list; // supported profiles by the current GPU - VAProfile va_profile; // current profile used - VAConfigID va_config_id; - bool config_attributes_probed; - uint32_t rt_format_attrib; - uint32_t output_format_mask; - uint32_t max_width; - uint32_t max_height; - uint32_t min_width; - uint32_t min_height; -} VaContextInfo; - -// The GpuVaContext singleton class providing access to the the GPU VA services -class GpuVaContext { -public: - std::vector va_contexts_; - - static GpuVaContext& GetInstance() { - static GpuVaContext instance; - return instance; - } - - rocDecStatus GetVaContext(int device_id, uint32_t *va_ctx_id) { - std::lock_guard lock(mutex); - bool found_existing = false; - uint32_t va_ctx_idx = 0; - if (!va_contexts_.empty()) { - for (va_ctx_idx = 0; va_ctx_idx < va_contexts_.size(); va_ctx_idx++) { - if (device_id == va_contexts_[va_ctx_idx].device_id) { - found_existing = true; - break; - } - } - } - if (found_existing) { - *va_ctx_id = va_ctx_idx; - return ROCDEC_SUCCESS; - } else { - va_contexts_.resize(va_contexts_.size() + 1); - va_ctx_idx = va_contexts_.size() - 1; - - va_contexts_[va_ctx_idx].device_id = device_id; - va_contexts_[va_ctx_idx].drm_fd = -1; - va_contexts_[va_ctx_idx].va_display = 0; - va_contexts_[va_ctx_idx].num_dec_engines = 1; - va_contexts_[va_ctx_idx].va_profile = VAProfileNone; - va_contexts_[va_ctx_idx].config_attributes_probed = false; - - rocDecStatus rocdec_status = ROCDEC_SUCCESS; - rocdec_status = InitHIP(va_ctx_idx); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize the HIP."); - return rocdec_status; - } - - std::string gcn_arch_name = va_contexts_[va_ctx_idx].hip_dev_prop.gcnArchName; - std::size_t pos = gcn_arch_name.find_first_of(":"); - std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; - std::vector visible_devices; - GetVisibleDevices(visible_devices); - - int offset = 0; - if (gcn_arch_name_base.compare("gfx942") == 0) { - std::vector current_compute_partitions; - GetCurrentComputePartition(current_compute_partitions); - if (current_compute_partitions.empty()) { - //if the current_compute_partitions is empty then the default SPX mode is assumed. - if (va_contexts_[va_ctx_idx].device_id < visible_devices.size()) { - offset = visible_devices[va_contexts_[va_ctx_idx].device_id] * 7; - } else { - offset = va_contexts_[va_ctx_idx].device_id * 7; - } - } else { - GetDrmNodeOffset(va_contexts_[va_ctx_idx].hip_dev_prop.name, va_contexts_[va_ctx_idx].device_id, visible_devices, current_compute_partitions, offset); - } - } - - std::string drm_node = "/dev/dri/renderD"; - if (va_contexts_[va_ctx_idx].device_id < visible_devices.size()) { - drm_node += std::to_string(128 + offset + visible_devices[va_contexts_[va_ctx_idx].device_id]); - } else { - drm_node += std::to_string(128 + offset + va_contexts_[va_ctx_idx].device_id); - } - - rocdec_status = InitVAAPI(va_ctx_idx, drm_node); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize the VAAPI."); - return rocdec_status; - } - - amdgpu_device_handle dev_handle; - uint32_t major_version = 0, minor_version = 0; - if (amdgpu_device_initialize(va_contexts_[va_ctx_idx].drm_fd, &major_version, &minor_version, &dev_handle)) { - ERR("GPU device initialization failed: " + drm_node); - return ROCDEC_DEVICE_INVALID; - } - if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_DEC, &va_contexts_[va_ctx_idx].num_dec_engines)) { - ERR("Failed to get the number of video decode engines."); - } - amdgpu_device_deinitialize(dev_handle); - - // Prob VA profiles - va_contexts_[va_ctx_idx].num_va_profiles = vaMaxNumProfiles(va_contexts_[va_ctx_idx].va_display); - va_contexts_[va_ctx_idx].va_profile_list.resize(va_contexts_[va_ctx_idx].num_va_profiles); - CHECK_VAAPI(vaQueryConfigProfiles(va_contexts_[va_ctx_idx].va_display, va_contexts_[va_ctx_idx].va_profile_list.data(), &va_contexts_[va_ctx_idx].num_va_profiles)); - - *va_ctx_id = va_ctx_idx; - return ROCDEC_SUCCESS; - } - } - - rocDecStatus GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display) { - if (va_ctx_id >= va_contexts_.size()) { - ERR("Invalid VA context Id."); - *va_display = 0; - return ROCDEC_INVALID_PARAMETER; - } else { - *va_display = va_contexts_[va_ctx_id].va_display; - return ROCDEC_SUCCESS; - } - } - - rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { - if (dec_cap == nullptr) { - ERR("Null decode capability struct pointer."); - return ROCDEC_INVALID_PARAMETER; - } - rocDecStatus rocdec_status = ROCDEC_SUCCESS; - uint32_t va_ctx_id; - rocdec_status = GetVaContext(dec_cap->device_id, &va_ctx_id); - if (rocdec_status != ROCDEC_SUCCESS) { - ERR("Failed to initilize."); - return rocdec_status; - } - - std::lock_guard lock(mutex); - dec_cap->is_supported = 1; // init value - VAProfile va_profile = VAProfileNone; - switch (dec_cap->codec_type) { - case rocDecVideoCodec_HEVC: { - if (dec_cap->bit_depth_minus_8 == 0) { - va_profile = VAProfileHEVCMain; - } else if (dec_cap->bit_depth_minus_8 == 2) { - va_profile = VAProfileHEVCMain10; - } - break; - } - case rocDecVideoCodec_AVC: { - va_profile = VAProfileH264Main; - break; - } - case rocDecVideoCodec_VP9: { - if (dec_cap->bit_depth_minus_8 == 0) { - va_profile = VAProfileVP9Profile0; - } else if (dec_cap->bit_depth_minus_8 == 2) { - va_profile = VAProfileVP9Profile2; - } - break; - } - case rocDecVideoCodec_AV1: { - #if VA_CHECK_VERSION(1,6,0) - va_profile = VAProfileAV1Profile0; - #else - va_profile = static_cast(32); // VAProfileAV1Profile0; - #endif - break; - } - default: { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - } - - int i; - for (i = 0; i < va_contexts_[va_ctx_id].num_va_profiles; i++) { - if (va_contexts_[va_ctx_id].va_profile_list[i] == va_profile) { - break; - } - } - if (i == va_contexts_[va_ctx_id].num_va_profiles) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - - // Check if the config attributes of the profile have been probed before - if (va_profile != va_contexts_[va_ctx_id].va_profile || va_contexts_[va_ctx_id].config_attributes_probed == false) { - va_contexts_[va_ctx_id].va_profile = va_profile; - - VAConfigAttrib va_config_attrib; - unsigned int attr_count; - std::vector attr_list; - va_config_attrib.type = VAConfigAttribRTFormat; - CHECK_VAAPI(vaGetConfigAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_profile, VAEntrypointVLD, &va_config_attrib, 1)); - va_contexts_[va_ctx_id].rt_format_attrib = va_config_attrib.value; - - CHECK_VAAPI(vaCreateConfig(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_profile, VAEntrypointVLD, &va_config_attrib, 1, &va_contexts_[va_ctx_id].va_config_id)); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id, 0, &attr_count)); - attr_list.resize(attr_count); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id, attr_list.data(), &attr_count)); - va_contexts_[va_ctx_id].output_format_mask = 0; - CHECK_VAAPI(vaDestroyConfig(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id)); - for (int k = 0; k < attr_count; k++) { - switch (attr_list[k].type) { - case VASurfaceAttribPixelFormat: { - switch (attr_list[k].value.value.i) { - case VA_FOURCC_NV12: - va_contexts_[va_ctx_id].output_format_mask |= 1 << rocDecVideoSurfaceFormat_NV12; - break; - case VA_FOURCC_P016: - va_contexts_[va_ctx_id].output_format_mask |= 1 << rocDecVideoSurfaceFormat_P016; - break; - default: - break; - } - } - break; - case VASurfaceAttribMinWidth: - va_contexts_[va_ctx_id].min_width = attr_list[k].value.value.i; - break; - case VASurfaceAttribMinHeight: - va_contexts_[va_ctx_id].min_height = attr_list[k].value.value.i; - break; - case VASurfaceAttribMaxWidth: - va_contexts_[va_ctx_id].max_width = attr_list[k].value.value.i; - break; - case VASurfaceAttribMaxHeight: - va_contexts_[va_ctx_id].max_height = attr_list[k].value.value.i; - break; - default: - break; - } - } - va_contexts_[va_ctx_id].config_attributes_probed = true; - } - - // Check chroma format - switch (dec_cap->chroma_format) { - case rocDecVideoChromaFormat_Monochrome: { - if ((va_contexts_[va_ctx_id].rt_format_attrib & VA_RT_FORMAT_YUV400) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case rocDecVideoChromaFormat_420: { - if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV420_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case rocDecVideoChromaFormat_422: { - if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV422_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case rocDecVideoChromaFormat_444: { - if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV444_10 | VA_RT_FORMAT_YUV444_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - default: { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - } - // Check bit depth - switch (dec_cap->bit_depth_minus_8) { - case 0: { - if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV400)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case 2: { - if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV444_10)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - case 4: { - if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420_12 | VA_RT_FORMAT_YUV422_12 | VA_RT_FORMAT_YUV444_12)) == 0) { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - break; - } - default: { - dec_cap->is_supported = 0; - return ROCDEC_SUCCESS; - } - } - - dec_cap->num_decoders = va_contexts_[va_ctx_id].num_dec_engines; - dec_cap->output_format_mask = va_contexts_[va_ctx_id].output_format_mask; - dec_cap->max_width = va_contexts_[va_ctx_id].max_width; - dec_cap->max_height = va_contexts_[va_ctx_id].max_height; - dec_cap->min_width = va_contexts_[va_ctx_id].min_width; - dec_cap->min_height = va_contexts_[va_ctx_id].min_height; - return ROCDEC_SUCCESS; - } - -private: - std::mutex mutex; - - GpuVaContext() {}; - GpuVaContext(const GpuVaContext&) = delete; - GpuVaContext& operator = (const GpuVaContext) = delete; - ~GpuVaContext() { - for (int i = 0; i < va_contexts_.size(); i++) { - if (va_contexts_[i].va_display) { - if (vaTerminate(va_contexts_[i].va_display) != VA_STATUS_SUCCESS) { - ERR("Failed to termiate VA"); - } - } - } - }; - - rocDecStatus InitHIP(int va_ctx_idx) { - CHECK_HIP(hipGetDeviceCount(&va_contexts_[va_ctx_idx].num_devices)); - if (va_contexts_[va_ctx_idx].num_devices < 1) { - ERR("Didn't find any GPU."); - return ROCDEC_DEVICE_INVALID; - } - if (va_contexts_[va_ctx_idx].device_id >= va_contexts_[va_ctx_idx].num_devices) { - ERR("ERROR: the requested device_id is not found! "); - return ROCDEC_DEVICE_INVALID; - } - CHECK_HIP(hipSetDevice(va_contexts_[va_ctx_idx].device_id)); - CHECK_HIP(hipGetDeviceProperties(&va_contexts_[va_ctx_idx].hip_dev_prop, va_contexts_[va_ctx_idx].device_id)); - return ROCDEC_SUCCESS; - } - - rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node) { - va_contexts_[va_ctx_idx].drm_fd = open(drm_node.c_str(), O_RDWR); - if (va_contexts_[va_ctx_idx].drm_fd < 0) { - ERR("Failed to open drm node." + drm_node); - return ROCDEC_NOT_INITIALIZED; - } - va_contexts_[va_ctx_idx].va_display = vaGetDisplayDRM(va_contexts_[va_ctx_idx].drm_fd); - if (!va_contexts_[va_ctx_idx].va_display) { - ERR("Failed to create VA display."); - return ROCDEC_NOT_INITIALIZED; - } - vaSetInfoCallback(va_contexts_[va_ctx_idx].va_display, NULL, NULL); - int major_version = 0, minor_version = 0; - CHECK_VAAPI(vaInitialize(va_contexts_[va_ctx_idx].va_display, &major_version, &minor_version)); - return ROCDEC_SUCCESS; - } - - void GetVisibleDevices(std::vector& visible_devices_vetor) { - char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); - if (visible_devices != nullptr) { - char *token = std::strtok(visible_devices,","); - while (token != nullptr) { - visible_devices_vetor.push_back(std::atoi(token)); - token = std::strtok(nullptr,","); - } - std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); - } - } - - void GetCurrentComputePartition(std::vector ¤t_compute_partitions) { - std::string search_path = "/sys/devices/"; - std::string partition_file = "current_compute_partition"; - std::error_code ec; - if (fs::exists(search_path)) { - for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) { - try { - if (it->path().filename() == partition_file) { - std::ifstream file(it->path()); - if (file.is_open()) { - std::string partition; - std::getline(file, partition); - if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { - current_compute_partitions.push_back(kSpx); - } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { - current_compute_partitions.push_back(kDpx); - } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { - current_compute_partitions.push_back(kTpx); - } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { - current_compute_partitions.push_back(kQpx); - } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { - current_compute_partitions.push_back(kCpx); - } - file.close(); - } - } - ++it; - } catch (fs::filesystem_error& e) { - it.increment(ec); - } - } - } - } - - void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { - if (!current_compute_partitions.empty()) { - switch (current_compute_partitions[0]) { - case kSpx: - if (device_id < visible_devices.size()) { - offset = visible_devices[device_id] * 7; - } else { - offset = device_id * 7; - } - break; - case kDpx: - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 2) * 6; - } else { - offset = (device_id / 2) * 6; - } - break; - case kTpx: - // Please note that although there are only 6 XCCs per socket on MI300A, - // there are two dummy render nodes added by the driver. - // This needs to be taken into account when creating drm_node on each socket in TPX mode. - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 3) * 5; - } else { - offset = (device_id / 3) * 5; - } - break; - case kQpx: - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 4) * 4; - } else { - offset = (device_id / 4) * 4; - } - break; - case kCpx: - // Please note that both MI300A and MI300X have the same gfx_arch_name which is - // gfx942. Therefore we cannot use the gfx942 to identify MI300A. - // instead use the device name and look for MI300A - // Also, as explained aboe in the TPX mode section, we need to be taken into account - // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. - std::string mi300a = "MI300A"; - size_t found_mi300a = device_name.find(mi300a); - if (found_mi300a != std::string::npos) { - if (device_id < visible_devices.size()) { - offset = (visible_devices[device_id] / 6) * 2; - } else { - offset = (device_id / 6) * 2; - } - } - break; - } - } - } -}; \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index 8d7ec523..ffdb0e48 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -501,4 +501,432 @@ rocDecStatus VaapiVideoDecoder::SyncSurface(int pic_idx) { CHECK_VAAPI(vaSyncSurface(va_display_, va_surface_ids_[pic_idx])); } return ROCDEC_SUCCESS; +} + +GpuVaContext::~GpuVaContext() { + for (int i = 0; i < va_contexts_.size(); i++) { + if (va_contexts_[i].va_display) { + if (vaTerminate(va_contexts_[i].va_display) != VA_STATUS_SUCCESS) { + ERR("Failed to termiate VA"); + } + } + } +}; + +rocDecStatus GpuVaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) { + std::lock_guard lock(mutex); + bool found_existing = false; + uint32_t va_ctx_idx = 0; + if (!va_contexts_.empty()) { + for (va_ctx_idx = 0; va_ctx_idx < va_contexts_.size(); va_ctx_idx++) { + if (device_id == va_contexts_[va_ctx_idx].device_id) { + found_existing = true; + break; + } + } + } + if (found_existing) { + *va_ctx_id = va_ctx_idx; + return ROCDEC_SUCCESS; + } else { + va_contexts_.resize(va_contexts_.size() + 1); + va_ctx_idx = va_contexts_.size() - 1; + + va_contexts_[va_ctx_idx].device_id = device_id; + va_contexts_[va_ctx_idx].drm_fd = -1; + va_contexts_[va_ctx_idx].va_display = 0; + va_contexts_[va_ctx_idx].num_dec_engines = 1; + va_contexts_[va_ctx_idx].va_profile = VAProfileNone; + va_contexts_[va_ctx_idx].config_attributes_probed = false; + + rocDecStatus rocdec_status = ROCDEC_SUCCESS; + rocdec_status = InitHIP(va_ctx_idx); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize the HIP."); + return rocdec_status; + } + + std::string gcn_arch_name = va_contexts_[va_ctx_idx].hip_dev_prop.gcnArchName; + std::size_t pos = gcn_arch_name.find_first_of(":"); + std::string gcn_arch_name_base = (pos != std::string::npos) ? gcn_arch_name.substr(0, pos) : gcn_arch_name; + std::vector visible_devices; + GetVisibleDevices(visible_devices); + + int offset = 0; + if (gcn_arch_name_base.compare("gfx942") == 0) { + std::vector current_compute_partitions; + GetCurrentComputePartition(current_compute_partitions); + if (current_compute_partitions.empty()) { + //if the current_compute_partitions is empty then the default SPX mode is assumed. + if (va_contexts_[va_ctx_idx].device_id < visible_devices.size()) { + offset = visible_devices[va_contexts_[va_ctx_idx].device_id] * 7; + } else { + offset = va_contexts_[va_ctx_idx].device_id * 7; + } + } else { + GetDrmNodeOffset(va_contexts_[va_ctx_idx].hip_dev_prop.name, va_contexts_[va_ctx_idx].device_id, visible_devices, current_compute_partitions, offset); + } + } + + std::string drm_node = "/dev/dri/renderD"; + if (va_contexts_[va_ctx_idx].device_id < visible_devices.size()) { + drm_node += std::to_string(128 + offset + visible_devices[va_contexts_[va_ctx_idx].device_id]); + } else { + drm_node += std::to_string(128 + offset + va_contexts_[va_ctx_idx].device_id); + } + + rocdec_status = InitVAAPI(va_ctx_idx, drm_node); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize the VAAPI."); + return rocdec_status; + } + + amdgpu_device_handle dev_handle; + uint32_t major_version = 0, minor_version = 0; + if (amdgpu_device_initialize(va_contexts_[va_ctx_idx].drm_fd, &major_version, &minor_version, &dev_handle)) { + ERR("GPU device initialization failed: " + drm_node); + return ROCDEC_DEVICE_INVALID; + } + if (amdgpu_query_hw_ip_count(dev_handle, AMDGPU_HW_IP_VCN_DEC, &va_contexts_[va_ctx_idx].num_dec_engines)) { + ERR("Failed to get the number of video decode engines."); + } + amdgpu_device_deinitialize(dev_handle); + + // Prob VA profiles + va_contexts_[va_ctx_idx].num_va_profiles = vaMaxNumProfiles(va_contexts_[va_ctx_idx].va_display); + va_contexts_[va_ctx_idx].va_profile_list.resize(va_contexts_[va_ctx_idx].num_va_profiles); + CHECK_VAAPI(vaQueryConfigProfiles(va_contexts_[va_ctx_idx].va_display, va_contexts_[va_ctx_idx].va_profile_list.data(), &va_contexts_[va_ctx_idx].num_va_profiles)); + + *va_ctx_id = va_ctx_idx; + return ROCDEC_SUCCESS; + } +} + +rocDecStatus GpuVaContext::GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display) { + if (va_ctx_id >= va_contexts_.size()) { + ERR("Invalid VA context Id."); + *va_display = 0; + return ROCDEC_INVALID_PARAMETER; + } else { + *va_display = va_contexts_[va_ctx_id].va_display; + return ROCDEC_SUCCESS; + } +} + +rocDecStatus GpuVaContext::CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { + if (dec_cap == nullptr) { + ERR("Null decode capability struct pointer."); + return ROCDEC_INVALID_PARAMETER; + } + rocDecStatus rocdec_status = ROCDEC_SUCCESS; + uint32_t va_ctx_id; + rocdec_status = GetVaContext(dec_cap->device_id, &va_ctx_id); + if (rocdec_status != ROCDEC_SUCCESS) { + ERR("Failed to initilize."); + return rocdec_status; + } + + std::lock_guard lock(mutex); + dec_cap->is_supported = 1; // init value + VAProfile va_profile = VAProfileNone; + switch (dec_cap->codec_type) { + case rocDecVideoCodec_HEVC: { + if (dec_cap->bit_depth_minus_8 == 0) { + va_profile = VAProfileHEVCMain; + } else if (dec_cap->bit_depth_minus_8 == 2) { + va_profile = VAProfileHEVCMain10; + } + break; + } + case rocDecVideoCodec_AVC: { + va_profile = VAProfileH264Main; + break; + } + case rocDecVideoCodec_VP9: { + if (dec_cap->bit_depth_minus_8 == 0) { + va_profile = VAProfileVP9Profile0; + } else if (dec_cap->bit_depth_minus_8 == 2) { + va_profile = VAProfileVP9Profile2; + } + break; + } + case rocDecVideoCodec_AV1: { + #if VA_CHECK_VERSION(1,6,0) + va_profile = VAProfileAV1Profile0; + #else + va_profile = static_cast(32); // VAProfileAV1Profile0; + #endif + break; + } + default: { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + } + + int i; + for (i = 0; i < va_contexts_[va_ctx_id].num_va_profiles; i++) { + if (va_contexts_[va_ctx_id].va_profile_list[i] == va_profile) { + break; + } + } + if (i == va_contexts_[va_ctx_id].num_va_profiles) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + + // Check if the config attributes of the profile have been probed before + if (va_profile != va_contexts_[va_ctx_id].va_profile || va_contexts_[va_ctx_id].config_attributes_probed == false) { + va_contexts_[va_ctx_id].va_profile = va_profile; + + VAConfigAttrib va_config_attrib; + unsigned int attr_count; + std::vector attr_list; + va_config_attrib.type = VAConfigAttribRTFormat; + CHECK_VAAPI(vaGetConfigAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_profile, VAEntrypointVLD, &va_config_attrib, 1)); + va_contexts_[va_ctx_id].rt_format_attrib = va_config_attrib.value; + + CHECK_VAAPI(vaCreateConfig(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_profile, VAEntrypointVLD, &va_config_attrib, 1, &va_contexts_[va_ctx_id].va_config_id)); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id, 0, &attr_count)); + attr_list.resize(attr_count); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id, attr_list.data(), &attr_count)); + va_contexts_[va_ctx_id].output_format_mask = 0; + CHECK_VAAPI(vaDestroyConfig(va_contexts_[va_ctx_id].va_display, va_contexts_[va_ctx_id].va_config_id)); + for (int k = 0; k < attr_count; k++) { + switch (attr_list[k].type) { + case VASurfaceAttribPixelFormat: { + switch (attr_list[k].value.value.i) { + case VA_FOURCC_NV12: + va_contexts_[va_ctx_id].output_format_mask |= 1 << rocDecVideoSurfaceFormat_NV12; + break; + case VA_FOURCC_P016: + va_contexts_[va_ctx_id].output_format_mask |= 1 << rocDecVideoSurfaceFormat_P016; + break; + default: + break; + } + } + break; + case VASurfaceAttribMinWidth: + va_contexts_[va_ctx_id].min_width = attr_list[k].value.value.i; + break; + case VASurfaceAttribMinHeight: + va_contexts_[va_ctx_id].min_height = attr_list[k].value.value.i; + break; + case VASurfaceAttribMaxWidth: + va_contexts_[va_ctx_id].max_width = attr_list[k].value.value.i; + break; + case VASurfaceAttribMaxHeight: + va_contexts_[va_ctx_id].max_height = attr_list[k].value.value.i; + break; + default: + break; + } + } + va_contexts_[va_ctx_id].config_attributes_probed = true; + } + + // Check chroma format + switch (dec_cap->chroma_format) { + case rocDecVideoChromaFormat_Monochrome: { + if ((va_contexts_[va_ctx_id].rt_format_attrib & VA_RT_FORMAT_YUV400) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_420: { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV420_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_422: { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV422_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case rocDecVideoChromaFormat_444: { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV444_10 | VA_RT_FORMAT_YUV444_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + default: { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + } + // Check bit depth + switch (dec_cap->bit_depth_minus_8) { + case 0: { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420 | VA_RT_FORMAT_YUV422 | VA_RT_FORMAT_YUV444 | VA_RT_FORMAT_YUV400)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case 2: { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420_10 | VA_RT_FORMAT_YUV422_10 | VA_RT_FORMAT_YUV444_10)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + case 4: { + if ((va_contexts_[va_ctx_id].rt_format_attrib & (VA_RT_FORMAT_YUV420_12 | VA_RT_FORMAT_YUV422_12 | VA_RT_FORMAT_YUV444_12)) == 0) { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + break; + } + default: { + dec_cap->is_supported = 0; + return ROCDEC_SUCCESS; + } + } + + dec_cap->num_decoders = va_contexts_[va_ctx_id].num_dec_engines; + dec_cap->output_format_mask = va_contexts_[va_ctx_id].output_format_mask; + dec_cap->max_width = va_contexts_[va_ctx_id].max_width; + dec_cap->max_height = va_contexts_[va_ctx_id].max_height; + dec_cap->min_width = va_contexts_[va_ctx_id].min_width; + dec_cap->min_height = va_contexts_[va_ctx_id].min_height; + return ROCDEC_SUCCESS; +} + +rocDecStatus GpuVaContext::InitHIP(int va_ctx_idx) { + CHECK_HIP(hipGetDeviceCount(&va_contexts_[va_ctx_idx].num_devices)); + if (va_contexts_[va_ctx_idx].num_devices < 1) { + ERR("Didn't find any GPU."); + return ROCDEC_DEVICE_INVALID; + } + if (va_contexts_[va_ctx_idx].device_id >= va_contexts_[va_ctx_idx].num_devices) { + ERR("ERROR: the requested device_id is not found! "); + return ROCDEC_DEVICE_INVALID; + } + CHECK_HIP(hipSetDevice(va_contexts_[va_ctx_idx].device_id)); + CHECK_HIP(hipGetDeviceProperties(&va_contexts_[va_ctx_idx].hip_dev_prop, va_contexts_[va_ctx_idx].device_id)); + return ROCDEC_SUCCESS; +} + +rocDecStatus GpuVaContext::InitVAAPI(int va_ctx_idx, std::string drm_node) { + va_contexts_[va_ctx_idx].drm_fd = open(drm_node.c_str(), O_RDWR); + if (va_contexts_[va_ctx_idx].drm_fd < 0) { + ERR("Failed to open drm node." + drm_node); + return ROCDEC_NOT_INITIALIZED; + } + va_contexts_[va_ctx_idx].va_display = vaGetDisplayDRM(va_contexts_[va_ctx_idx].drm_fd); + if (!va_contexts_[va_ctx_idx].va_display) { + ERR("Failed to create VA display."); + return ROCDEC_NOT_INITIALIZED; + } + vaSetInfoCallback(va_contexts_[va_ctx_idx].va_display, NULL, NULL); + int major_version = 0, minor_version = 0; + CHECK_VAAPI(vaInitialize(va_contexts_[va_ctx_idx].va_display, &major_version, &minor_version)); + return ROCDEC_SUCCESS; +} + +void GpuVaContext::GetVisibleDevices(std::vector& visible_devices_vetor) { + char *visible_devices = std::getenv("HIP_VISIBLE_DEVICES"); + if (visible_devices != nullptr) { + char *token = std::strtok(visible_devices,","); + while (token != nullptr) { + visible_devices_vetor.push_back(std::atoi(token)); + token = std::strtok(nullptr,","); + } + std::sort(visible_devices_vetor.begin(), visible_devices_vetor.end()); + } +} + +void GpuVaContext::GetCurrentComputePartition(std::vector ¤t_compute_partitions) { + std::string search_path = "/sys/devices/"; + std::string partition_file = "current_compute_partition"; + std::error_code ec; + if (fs::exists(search_path)) { + for (auto it = fs::recursive_directory_iterator(search_path, fs::directory_options::skip_permission_denied); it != fs::recursive_directory_iterator(); ) { + try { + if (it->path().filename() == partition_file) { + std::ifstream file(it->path()); + if (file.is_open()) { + std::string partition; + std::getline(file, partition); + if (partition.compare("SPX") == 0 || partition.compare("spx") == 0) { + current_compute_partitions.push_back(kSpx); + } else if (partition.compare("DPX") == 0 || partition.compare("dpx") == 0) { + current_compute_partitions.push_back(kDpx); + } else if (partition.compare("TPX") == 0 || partition.compare("tpx") == 0) { + current_compute_partitions.push_back(kTpx); + } else if (partition.compare("QPX") == 0 || partition.compare("qpx") == 0) { + current_compute_partitions.push_back(kQpx); + } else if (partition.compare("CPX") == 0 || partition.compare("cpx") == 0) { + current_compute_partitions.push_back(kCpx); + } + file.close(); + } + } + ++it; + } catch (fs::filesystem_error& e) { + it.increment(ec); + } + } + } +} + +void GpuVaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { + if (!current_compute_partitions.empty()) { + switch (current_compute_partitions[0]) { + case kSpx: + if (device_id < visible_devices.size()) { + offset = visible_devices[device_id] * 7; + } else { + offset = device_id * 7; + } + break; + case kDpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 2) * 6; + } else { + offset = (device_id / 2) * 6; + } + break; + case kTpx: + // Please note that although there are only 6 XCCs per socket on MI300A, + // there are two dummy render nodes added by the driver. + // This needs to be taken into account when creating drm_node on each socket in TPX mode. + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 3) * 5; + } else { + offset = (device_id / 3) * 5; + } + break; + case kQpx: + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 4) * 4; + } else { + offset = (device_id / 4) * 4; + } + break; + case kCpx: + // Please note that both MI300A and MI300X have the same gfx_arch_name which is + // gfx942. Therefore we cannot use the gfx942 to identify MI300A. + // instead use the device name and look for MI300A + // Also, as explained aboe in the TPX mode section, we need to be taken into account + // the extra two dummy nodes when creating drm_node on each socket in CPX mode as well. + std::string mi300a = "MI300A"; + size_t found_mi300a = device_name.find(mi300a); + if (found_mi300a != std::string::npos) { + if (device_id < visible_devices.size()) { + offset = (visible_devices[device_id] / 6) * 2; + } else { + offset = (device_id / 6) * 2; + } + } + break; + } + } } \ No newline at end of file diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index ec6399c7..5a53d03c 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -45,10 +45,53 @@ THE SOFTWARE. #include #include "../../commons.h" #include "../../../api/rocdecode.h" -#include "rocdecode_va_context.h" + +#define CHECK_HIP(call) {\ + hipError_t hip_status = call;\ + if (hip_status != hipSuccess) {\ + std::cout << "HIP failure: " << #call << " failed with 'status: " << hipGetErrorName(hip_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCDEC_RUNTIME_ERROR;\ + }\ +} + +#define CHECK_VAAPI(call) {\ + VAStatus va_status = call;\ + if (va_status != VA_STATUS_SUCCESS) {\ + std::cout << "VAAPI failure: " << #call << " failed with status: " << std::hex << "0x" << va_status << std::dec << " = '" << vaErrorStr(va_status) << "' at " << __FILE__ << ":" << __LINE__ << std::endl;\ + return ROCDEC_RUNTIME_ERROR;\ + }\ +} #define INIT_SLICE_PARAM_LIST_NUM 16 // initial slice parameter buffer list size +typedef enum { + kSpx = 0, // Single Partition Accelerator + kDpx = 1, // Dual Partition Accelerator + kTpx = 2, // Triple Partition Accelerator + kQpx = 3, // Quad Partition Accelerator + kCpx = 4, // Core Partition Accelerator +} ComputePartition; + +typedef struct { + int num_devices; + int device_id; + int drm_fd; + VADisplay va_display; + hipDeviceProp_t hip_dev_prop; + uint32_t num_dec_engines; + int num_va_profiles; + std::vector va_profile_list; // supported profiles by the current GPU + VAProfile va_profile; // current profile used + VAConfigID va_config_id; + bool config_attributes_probed; + uint32_t rt_format_attrib; + uint32_t output_format_mask; + uint32_t max_width; + uint32_t max_height; + uint32_t min_width; + uint32_t min_height; +} VaContextInfo; + class VaapiVideoDecoder { public: VaapiVideoDecoder(RocDecoderCreateInfo &decoder_create_info); @@ -83,4 +126,33 @@ class VaapiVideoDecoder { rocDecStatus CreateSurfaces(); rocDecStatus CreateContext(); rocDecStatus DestroyDataBuffers(); +}; + +// The GpuVaContext singleton class providing access to the the GPU VA services +class GpuVaContext { +public: + std::vector va_contexts_; + + static GpuVaContext& GetInstance() { + static GpuVaContext instance; + return instance; + } + + rocDecStatus GetVaContext(int device_id, uint32_t *va_ctx_id); + rocDecStatus GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display); + rocDecStatus CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap); + +private: + std::mutex mutex; + + GpuVaContext() {}; + GpuVaContext(const GpuVaContext&) = delete; + GpuVaContext& operator = (const GpuVaContext) = delete; + ~GpuVaContext(); + + rocDecStatus InitHIP(int va_ctx_idx); + rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node); + void GetVisibleDevices(std::vector& visible_devices_vetor); + void GetCurrentComputePartition(std::vector ¤t_compute_partitions); + void GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset); }; \ No newline at end of file From a95054fdb8b663b983ab35726264e17118f341be Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Tue, 7 Jan 2025 21:02:38 -0500 Subject: [PATCH 08/10] * rocDecode/HW cap: Added changed based on review comments. --- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 29 ++++------------------ src/rocdecode/vaapi/vaapi_videodecoder.h | 4 +-- 2 files changed, 7 insertions(+), 26 deletions(-) diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index ce81acb4..a823dee1 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -521,10 +521,9 @@ rocDecStatus GpuVaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) { std::lock_guard lock(mutex); bool found_existing = false; uint32_t va_ctx_idx = 0; - int num_devices; hipDeviceProp_t hip_dev_prop; rocDecStatus rocdec_status = ROCDEC_SUCCESS; - rocdec_status = InitHIP(device_id, num_devices, hip_dev_prop); + rocdec_status = InitHIP(device_id, hip_dev_prop); if (rocdec_status != ROCDEC_SUCCESS) { ERR("Failed to initilize the HIP."); return rocdec_status; @@ -546,7 +545,6 @@ rocDecStatus GpuVaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) { va_contexts_.resize(va_contexts_.size() + 1); va_ctx_idx = va_contexts_.size() - 1; - va_contexts_[va_ctx_idx].num_devices = num_devices; va_contexts_[va_ctx_idx].device_id = device_id; va_contexts_[va_ctx_idx].gpu_uuid.assign(gpu_uuid); va_contexts_[va_ctx_idx].hip_dev_prop = hip_dev_prop; @@ -799,14 +797,13 @@ rocDecStatus GpuVaContext::CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { return ROCDEC_SUCCESS; } -#if 1 -rocDecStatus GpuVaContext::InitHIP(int device_id, int& num_devices, hipDeviceProp_t& hip_dev_prop) { - CHECK_HIP(hipGetDeviceCount(&num_devices)); - if (num_devices < 1) { +rocDecStatus GpuVaContext::InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop) { + CHECK_HIP(hipGetDeviceCount(&num_devices_)); + if (num_devices_ < 1) { ERR("Didn't find any GPU."); return ROCDEC_DEVICE_INVALID; } - if (device_id >= num_devices) { + if (device_id >= num_devices_) { ERR("ERROR: the requested device_id is not found! "); return ROCDEC_DEVICE_INVALID; } @@ -814,22 +811,6 @@ rocDecStatus GpuVaContext::InitHIP(int device_id, int& num_devices, hipDevicePro CHECK_HIP(hipGetDeviceProperties(&hip_dev_prop, device_id)); return ROCDEC_SUCCESS; } -#else -rocDecStatus GpuVaContext::InitHIP(int va_ctx_idx) { - CHECK_HIP(hipGetDeviceCount(&va_contexts_[va_ctx_idx].num_devices)); - if (va_contexts_[va_ctx_idx].num_devices < 1) { - ERR("Didn't find any GPU."); - return ROCDEC_DEVICE_INVALID; - } - if (va_contexts_[va_ctx_idx].device_id >= va_contexts_[va_ctx_idx].num_devices) { - ERR("ERROR: the requested device_id is not found! "); - return ROCDEC_DEVICE_INVALID; - } - CHECK_HIP(hipSetDevice(va_contexts_[va_ctx_idx].device_id)); - CHECK_HIP(hipGetDeviceProperties(&va_contexts_[va_ctx_idx].hip_dev_prop, va_contexts_[va_ctx_idx].device_id)); - return ROCDEC_SUCCESS; -} -#endif rocDecStatus GpuVaContext::InitVAAPI(int va_ctx_idx, std::string drm_node) { va_contexts_[va_ctx_idx].drm_fd = open(drm_node.c_str(), O_RDWR); diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index 32d6a67c..141ea18d 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -74,7 +74,6 @@ typedef enum { } ComputePartition; typedef struct { - int num_devices; int device_id; std::string gpu_uuid; int drm_fd; @@ -133,6 +132,7 @@ class VaapiVideoDecoder { // The GpuVaContext singleton class providing access to the the GPU VA services class GpuVaContext { public: + int num_devices_; std::vector va_contexts_; static GpuVaContext& GetInstance() { @@ -159,7 +159,7 @@ class GpuVaContext { GpuVaContext& operator = (const GpuVaContext) = delete; ~GpuVaContext(); - rocDecStatus InitHIP(int device_id, int& num_devices, hipDeviceProp_t& hip_dev_prop); + rocDecStatus InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop); rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node); void GetVisibleDevices(std::vector& visible_devices_vetor); void GetCurrentComputePartition(std::vector ¤t_compute_partitions); From cd8d52729fc2ef0030ee8401660b8d795338651f Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Wed, 8 Jan 2025 10:34:03 -0500 Subject: [PATCH 09/10] * rocDecode/HW cap: Added changes based on review comments. --- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 306 ++++++++++----------- utils/rocvideodecode/roc_video_dec.cpp | 2 +- 2 files changed, 154 insertions(+), 154 deletions(-) diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index a823dee1..d87bb238 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -55,19 +55,6 @@ VaapiVideoDecoder::~VaapiVideoDecoder() { } } -bool VaapiVideoDecoder::IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { - RocdecDecodeCaps decode_caps; - decode_caps.device_id = device_id; - decode_caps.codec_type = codec_type; - decode_caps.chroma_format = chroma_format; - decode_caps.bit_depth_minus_8 = bit_depth_minus8; - if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || (decode_caps.is_supported == false) || ((decode_caps.output_format_mask & (1 << output_format)) == 0)) { - return false; - } else { - return true; - } -} - rocDecStatus VaapiVideoDecoder::InitializeDecoder() { rocDecStatus rocdec_status = ROCDEC_SUCCESS; @@ -106,140 +93,6 @@ rocDecStatus VaapiVideoDecoder::InitializeDecoder() { return rocdec_status; } -rocDecStatus VaapiVideoDecoder::CreateDecoderConfig() { - switch (decoder_create_info_.codec_type) { - case rocDecVideoCodec_HEVC: - if (decoder_create_info_.bit_depth_minus_8 == 0) { - va_profile_ = VAProfileHEVCMain; - } else if (decoder_create_info_.bit_depth_minus_8 == 2) { - va_profile_ = VAProfileHEVCMain10; - } - break; - case rocDecVideoCodec_AVC: - va_profile_ = VAProfileH264Main; - break; - case rocDecVideoCodec_VP9: - if (decoder_create_info_.bit_depth_minus_8 == 0) { - va_profile_ = VAProfileVP9Profile0; - } else if (decoder_create_info_.bit_depth_minus_8 == 2) { - va_profile_ = VAProfileVP9Profile2; - } - break; - case rocDecVideoCodec_AV1: -#if VA_CHECK_VERSION(1,6,0) - va_profile_ = VAProfileAV1Profile0; -#else - va_profile_ = static_cast(32); // VAProfileAV1Profile0; -#endif - break; - default: - ERR("The codec type is not supported."); - return ROCDEC_NOT_SUPPORTED; - } - va_config_attrib_.type = VAConfigAttribRTFormat; - CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_, 1)); - CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_, 1, &va_config_id_)); - unsigned int num_attribs = 0; - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, nullptr, &num_attribs)); - std::vector attribs(num_attribs); - CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, attribs.data(), &num_attribs)); - for (auto attrib : attribs) { - if (attrib.type == VASurfaceAttribDRMFormatModifiers) { - supports_modifiers_ = true; - break; - } - } - return ROCDEC_SUCCESS; -} - -rocDecStatus VaapiVideoDecoder::CreateSurfaces() { - if (decoder_create_info_.num_decode_surfaces < 1) { - ERR("Invalid number of decode surfaces."); - return ROCDEC_INVALID_PARAMETER; - } - va_surface_ids_.resize(decoder_create_info_.num_decode_surfaces); - std::vector surf_attribs; - VASurfaceAttrib surf_attrib; - surf_attrib.type = VASurfaceAttribPixelFormat; - surf_attrib.flags = VA_SURFACE_ATTRIB_SETTABLE; - surf_attrib.value.type = VAGenericValueTypeInteger; - uint32_t surface_format; - switch (decoder_create_info_.chroma_format) { - case rocDecVideoChromaFormat_Monochrome: - surface_format = VA_RT_FORMAT_YUV400; - surf_attrib.value.value.i = VA_FOURCC_Y800; - break; - case rocDecVideoChromaFormat_420: - if (decoder_create_info_.bit_depth_minus_8 == 2) { - surface_format = VA_RT_FORMAT_YUV420_10; - surf_attrib.value.value.i = VA_FOURCC_P010; - } else if (decoder_create_info_.bit_depth_minus_8 == 4) { - surface_format = VA_RT_FORMAT_YUV420_12; -#if VA_CHECK_VERSION(1,8,0) - surf_attrib.value.value.i = VA_FOURCC_P012; -#else - surf_attrib.value.value.i = 0x32313050; // VA_FOURCC_P012 -#endif - } else { - surface_format = VA_RT_FORMAT_YUV420; - surf_attrib.value.value.i = VA_FOURCC_NV12; - } - break; - case rocDecVideoChromaFormat_422: - surface_format = VA_RT_FORMAT_YUV422; - break; - case rocDecVideoChromaFormat_444: - surface_format = VA_RT_FORMAT_YUV444; - break; - default: - ERR("The surface type is not supported"); - return ROCDEC_NOT_SUPPORTED; - } - surf_attribs.push_back(surf_attrib); - uint64_t mod_linear = 0; - VADRMFormatModifierList modifier_list = { - .num_modifiers = 1, - .modifiers = &mod_linear, - }; - if (supports_modifiers_) { - surf_attrib.type = VASurfaceAttribDRMFormatModifiers; - surf_attrib.value.type = VAGenericValueTypePointer; - surf_attrib.value.value.p = &modifier_list; - surf_attribs.push_back(surf_attrib); - } - CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, decoder_create_info_.width, - decoder_create_info_.height, va_surface_ids_.data(), va_surface_ids_.size(), surf_attribs.data(), surf_attribs.size())); - return ROCDEC_SUCCESS; -} - -rocDecStatus VaapiVideoDecoder::CreateContext() { - CHECK_VAAPI(vaCreateContext(va_display_, va_config_id_, decoder_create_info_.width, decoder_create_info_.height, - VA_PROGRESSIVE, va_surface_ids_.data(), va_surface_ids_.size(), &va_context_id_)); - return ROCDEC_SUCCESS; -} - -rocDecStatus VaapiVideoDecoder::DestroyDataBuffers() { - if (pic_params_buf_id_) { - CHECK_VAAPI(vaDestroyBuffer(va_display_, pic_params_buf_id_)); - pic_params_buf_id_ = 0; - } - if (iq_matrix_buf_id_) { - CHECK_VAAPI(vaDestroyBuffer(va_display_, iq_matrix_buf_id_)); - iq_matrix_buf_id_ = 0; - } - for (int i = 0; i < num_slices_; i++) { - if (slice_params_buf_id_[i]) { - CHECK_VAAPI(vaDestroyBuffer(va_display_, slice_params_buf_id_[i])); - slice_params_buf_id_[i] = 0; - } - } - if (slice_data_buf_id_) { - CHECK_VAAPI(vaDestroyBuffer(va_display_, slice_data_buf_id_)); - slice_data_buf_id_ = 0; - } - return ROCDEC_SUCCESS; -} - rocDecStatus VaapiVideoDecoder::SubmitDecode(RocdecPicParams *pPicParams) { void *pic_params_ptr, *iq_matrix_ptr, *slice_params_ptr; uint32_t pic_params_size, iq_matrix_size, slice_params_size; @@ -460,6 +313,18 @@ rocDecStatus VaapiVideoDecoder::ExportSurface(int pic_idx, VADRMPRIMESurfaceDesc return ROCDEC_SUCCESS; } +rocDecStatus VaapiVideoDecoder::SyncSurface(int pic_idx) { + if (pic_idx >= va_surface_ids_.size()) { + return ROCDEC_INVALID_PARAMETER; + } + VASurfaceStatus surface_status; + CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status)); + if (surface_status != VASurfaceReady) { + CHECK_VAAPI(vaSyncSurface(va_display_, va_surface_ids_[pic_idx])); + } + return ROCDEC_SUCCESS; +} + rocDecStatus VaapiVideoDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo *reconfig_params) { if (reconfig_params == nullptr) { return ROCDEC_INVALID_PARAMETER; @@ -491,14 +356,149 @@ rocDecStatus VaapiVideoDecoder::ReconfigureDecoder(RocdecReconfigureDecoderInfo return rocdec_status; } -rocDecStatus VaapiVideoDecoder::SyncSurface(int pic_idx) { - if (pic_idx >= va_surface_ids_.size()) { +bool VaapiVideoDecoder::IsCodecConfigSupported(int device_id, rocDecVideoCodec codec_type, rocDecVideoChromaFormat chroma_format, uint32_t bit_depth_minus8, rocDecVideoSurfaceFormat output_format) { + RocdecDecodeCaps decode_caps; + decode_caps.device_id = device_id; + decode_caps.codec_type = codec_type; + decode_caps.chroma_format = chroma_format; + decode_caps.bit_depth_minus_8 = bit_depth_minus8; + if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || (decode_caps.is_supported == false) || ((decode_caps.output_format_mask & (1 << output_format)) == 0)) { + return false; + } else { + return true; + } +} + +rocDecStatus VaapiVideoDecoder::CreateDecoderConfig() { + switch (decoder_create_info_.codec_type) { + case rocDecVideoCodec_HEVC: + if (decoder_create_info_.bit_depth_minus_8 == 0) { + va_profile_ = VAProfileHEVCMain; + } else if (decoder_create_info_.bit_depth_minus_8 == 2) { + va_profile_ = VAProfileHEVCMain10; + } + break; + case rocDecVideoCodec_AVC: + va_profile_ = VAProfileH264Main; + break; + case rocDecVideoCodec_VP9: + if (decoder_create_info_.bit_depth_minus_8 == 0) { + va_profile_ = VAProfileVP9Profile0; + } else if (decoder_create_info_.bit_depth_minus_8 == 2) { + va_profile_ = VAProfileVP9Profile2; + } + break; + case rocDecVideoCodec_AV1: +#if VA_CHECK_VERSION(1,6,0) + va_profile_ = VAProfileAV1Profile0; +#else + va_profile_ = static_cast(32); // VAProfileAV1Profile0; +#endif + break; + default: + ERR("The codec type is not supported."); + return ROCDEC_NOT_SUPPORTED; + } + va_config_attrib_.type = VAConfigAttribRTFormat; + CHECK_VAAPI(vaGetConfigAttributes(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_, 1)); + CHECK_VAAPI(vaCreateConfig(va_display_, va_profile_, VAEntrypointVLD, &va_config_attrib_, 1, &va_config_id_)); + unsigned int num_attribs = 0; + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, nullptr, &num_attribs)); + std::vector attribs(num_attribs); + CHECK_VAAPI(vaQuerySurfaceAttributes(va_display_, va_config_id_, attribs.data(), &num_attribs)); + for (auto attrib : attribs) { + if (attrib.type == VASurfaceAttribDRMFormatModifiers) { + supports_modifiers_ = true; + break; + } + } + return ROCDEC_SUCCESS; +} + +rocDecStatus VaapiVideoDecoder::CreateSurfaces() { + if (decoder_create_info_.num_decode_surfaces < 1) { + ERR("Invalid number of decode surfaces."); return ROCDEC_INVALID_PARAMETER; } - VASurfaceStatus surface_status; - CHECK_VAAPI(vaQuerySurfaceStatus(va_display_, va_surface_ids_[pic_idx], &surface_status)); - if (surface_status != VASurfaceReady) { - CHECK_VAAPI(vaSyncSurface(va_display_, va_surface_ids_[pic_idx])); + va_surface_ids_.resize(decoder_create_info_.num_decode_surfaces); + std::vector surf_attribs; + VASurfaceAttrib surf_attrib; + surf_attrib.type = VASurfaceAttribPixelFormat; + surf_attrib.flags = VA_SURFACE_ATTRIB_SETTABLE; + surf_attrib.value.type = VAGenericValueTypeInteger; + uint32_t surface_format; + switch (decoder_create_info_.chroma_format) { + case rocDecVideoChromaFormat_Monochrome: + surface_format = VA_RT_FORMAT_YUV400; + surf_attrib.value.value.i = VA_FOURCC_Y800; + break; + case rocDecVideoChromaFormat_420: + if (decoder_create_info_.bit_depth_minus_8 == 2) { + surface_format = VA_RT_FORMAT_YUV420_10; + surf_attrib.value.value.i = VA_FOURCC_P010; + } else if (decoder_create_info_.bit_depth_minus_8 == 4) { + surface_format = VA_RT_FORMAT_YUV420_12; +#if VA_CHECK_VERSION(1,8,0) + surf_attrib.value.value.i = VA_FOURCC_P012; +#else + surf_attrib.value.value.i = 0x32313050; // VA_FOURCC_P012 +#endif + } else { + surface_format = VA_RT_FORMAT_YUV420; + surf_attrib.value.value.i = VA_FOURCC_NV12; + } + break; + case rocDecVideoChromaFormat_422: + surface_format = VA_RT_FORMAT_YUV422; + break; + case rocDecVideoChromaFormat_444: + surface_format = VA_RT_FORMAT_YUV444; + break; + default: + ERR("The surface type is not supported"); + return ROCDEC_NOT_SUPPORTED; + } + surf_attribs.push_back(surf_attrib); + uint64_t mod_linear = 0; + VADRMFormatModifierList modifier_list = { + .num_modifiers = 1, + .modifiers = &mod_linear, + }; + if (supports_modifiers_) { + surf_attrib.type = VASurfaceAttribDRMFormatModifiers; + surf_attrib.value.type = VAGenericValueTypePointer; + surf_attrib.value.value.p = &modifier_list; + surf_attribs.push_back(surf_attrib); + } + CHECK_VAAPI(vaCreateSurfaces(va_display_, surface_format, decoder_create_info_.width, + decoder_create_info_.height, va_surface_ids_.data(), va_surface_ids_.size(), surf_attribs.data(), surf_attribs.size())); + return ROCDEC_SUCCESS; +} + +rocDecStatus VaapiVideoDecoder::CreateContext() { + CHECK_VAAPI(vaCreateContext(va_display_, va_config_id_, decoder_create_info_.width, decoder_create_info_.height, + VA_PROGRESSIVE, va_surface_ids_.data(), va_surface_ids_.size(), &va_context_id_)); + return ROCDEC_SUCCESS; +} + +rocDecStatus VaapiVideoDecoder::DestroyDataBuffers() { + if (pic_params_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, pic_params_buf_id_)); + pic_params_buf_id_ = 0; + } + if (iq_matrix_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, iq_matrix_buf_id_)); + iq_matrix_buf_id_ = 0; + } + for (int i = 0; i < num_slices_; i++) { + if (slice_params_buf_id_[i]) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, slice_params_buf_id_[i])); + slice_params_buf_id_[i] = 0; + } + } + if (slice_data_buf_id_) { + CHECK_VAAPI(vaDestroyBuffer(va_display_, slice_data_buf_id_)); + slice_data_buf_id_ = 0; } return ROCDEC_SUCCESS; } diff --git a/utils/rocvideodecode/roc_video_dec.cpp b/utils/rocvideodecode/roc_video_dec.cpp index 6aa398e0..6ea6fdd9 100644 --- a/utils/rocvideodecode/roc_video_dec.cpp +++ b/utils/rocvideodecode/roc_video_dec.cpp @@ -1058,7 +1058,7 @@ bool RocVideoDecoder::CodecSupported(int device_id, rocDecVideoCodec codec_id, u decode_caps.codec_type = codec_id; decode_caps.chroma_format = rocDecVideoChromaFormat_420; decode_caps.bit_depth_minus_8 = bit_depth - 8; - if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || (decode_caps.is_supported == false)) { + if((rocDecGetDecoderCaps(&decode_caps) != ROCDEC_SUCCESS) || !decode_caps.is_supported) { return false; } else { return true; From cc6c46d9eb3287f84e61539b7e03d07b0e3bf89b Mon Sep 17 00:00:00 2001 From: Jeff Jiang Date: Thu, 9 Jan 2025 17:02:44 -0500 Subject: [PATCH 10/10] * rocDecode/HW cap: GPU VA context class name change based on review comment. --- src/rocdecode/rocdecode_api.cpp | 2 +- src/rocdecode/vaapi/vaapi_videodecoder.cpp | 24 +++++++++++----------- src/rocdecode/vaapi/vaapi_videodecoder.h | 16 +++++++-------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/rocdecode/rocdecode_api.cpp b/src/rocdecode/rocdecode_api.cpp index 39329340..09e814b4 100644 --- a/src/rocdecode/rocdecode_api.cpp +++ b/src/rocdecode/rocdecode_api.cpp @@ -72,7 +72,7 @@ rocDecGetDecoderCaps(RocdecDecodeCaps *pdc) { if (pdc == nullptr) { return ROCDEC_INVALID_PARAMETER; } - GpuVaContext& va_ctx = GpuVaContext::GetInstance(); + VaContext& va_ctx = VaContext::GetInstance(); rocDecStatus ret = ROCDEC_SUCCESS; if ((ret = va_ctx.CheckDecCapForCodecType(pdc)) != ROCDEC_SUCCESS) { ERR("Failed to obtain decoder capabilities from driver."); diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.cpp b/src/rocdecode/vaapi/vaapi_videodecoder.cpp index f2fa316b..a7ccd6c5 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.cpp +++ b/src/rocdecode/vaapi/vaapi_videodecoder.cpp @@ -65,7 +65,7 @@ rocDecStatus VaapiVideoDecoder::InitializeDecoder() { return ROCDEC_NOT_SUPPORTED; } - GpuVaContext& va_ctx = GpuVaContext::GetInstance(); + VaContext& va_ctx = VaContext::GetInstance(); uint32_t va_ctx_id; if ((rocdec_status = va_ctx.GetVaContext(decoder_create_info_.device_id, &va_ctx_id)) != ROCDEC_SUCCESS) { ERR("Failed to get VA context."); @@ -503,11 +503,11 @@ rocDecStatus VaapiVideoDecoder::DestroyDataBuffers() { return ROCDEC_SUCCESS; } -GpuVaContext::GpuVaContext() { +VaContext::VaContext() { GetGpuUuids(); } -GpuVaContext::~GpuVaContext() { +VaContext::~VaContext() { for (int i = 0; i < va_contexts_.size(); i++) { if (va_contexts_[i].va_display) { if (vaTerminate(va_contexts_[i].va_display) != VA_STATUS_SUCCESS) { @@ -517,7 +517,7 @@ GpuVaContext::~GpuVaContext() { } }; -rocDecStatus GpuVaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) { +rocDecStatus VaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) { std::lock_guard lock(mutex); bool found_existing = false; uint32_t va_ctx_idx = 0; @@ -600,7 +600,7 @@ rocDecStatus GpuVaContext::GetVaContext(int device_id, uint32_t *va_ctx_id) { } } -rocDecStatus GpuVaContext::GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display) { +rocDecStatus VaContext::GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_display) { if (va_ctx_id >= va_contexts_.size()) { ERR("Invalid VA context Id."); *va_display = 0; @@ -611,7 +611,7 @@ rocDecStatus GpuVaContext::GetVaDisplay(uint32_t va_ctx_id, VADisplay *va_displa } } -rocDecStatus GpuVaContext::CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { +rocDecStatus VaContext::CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { if (dec_cap == nullptr) { ERR("Null decode capability struct pointer."); return ROCDEC_INVALID_PARAMETER; @@ -797,7 +797,7 @@ rocDecStatus GpuVaContext::CheckDecCapForCodecType(RocdecDecodeCaps *dec_cap) { return ROCDEC_SUCCESS; } -rocDecStatus GpuVaContext::InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop) { +rocDecStatus VaContext::InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop) { CHECK_HIP(hipGetDeviceCount(&num_devices_)); if (num_devices_ < 1) { ERR("Didn't find any GPU."); @@ -812,7 +812,7 @@ rocDecStatus GpuVaContext::InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop) return ROCDEC_SUCCESS; } -rocDecStatus GpuVaContext::InitVAAPI(int va_ctx_idx, std::string drm_node) { +rocDecStatus VaContext::InitVAAPI(int va_ctx_idx, std::string drm_node) { va_contexts_[va_ctx_idx].drm_fd = open(drm_node.c_str(), O_RDWR); if (va_contexts_[va_ctx_idx].drm_fd < 0) { ERR("Failed to open drm node." + drm_node); @@ -829,7 +829,7 @@ rocDecStatus GpuVaContext::InitVAAPI(int va_ctx_idx, std::string drm_node) { return ROCDEC_SUCCESS; } -void GpuVaContext::GetVisibleDevices(std::vector& visible_devices_vetor) { +void VaContext::GetVisibleDevices(std::vector& visible_devices_vetor) { // First, check if the ROCR_VISIBLE_DEVICES environment variable is present char *visible_devices = std::getenv("ROCR_VISIBLE_DEVICES"); // If ROCR_VISIBLE_DEVICES is not present, check if HIP_VISIBLE_DEVICES is present @@ -846,7 +846,7 @@ void GpuVaContext::GetVisibleDevices(std::vector& visible_devices_vetor) { } } -void GpuVaContext::GetCurrentComputePartition(std::vector ¤t_compute_partitions) { +void VaContext::GetCurrentComputePartition(std::vector ¤t_compute_partitions) { std::string search_path = "/sys/devices/"; std::string partition_file = "current_compute_partition"; std::error_code ec; @@ -880,7 +880,7 @@ void GpuVaContext::GetCurrentComputePartition(std::vector &cur } } -void GpuVaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { +void VaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, std::vector& visible_devices, std::vector ¤t_compute_partitions, int &offset) { if (!current_compute_partitions.empty()) { switch (current_compute_partitions[0]) { case kSpx: @@ -939,7 +939,7 @@ void GpuVaContext::GetDrmNodeOffset(std::string device_name, uint8_t device_id, * UUID from the corresponding sysfs path. It maps each unique GPU UUID to its * corresponding render node ID and stores this mapping in the gpu_uuids_to_render_nodes_map_. */ -void GpuVaContext::GetGpuUuids() { +void VaContext::GetGpuUuids() { std::string dri_path = "/dev/dri"; // Iterate through all render nodes for (const auto& entry : fs::directory_iterator(dri_path, fs::directory_options::skip_permission_denied)) { diff --git a/src/rocdecode/vaapi/vaapi_videodecoder.h b/src/rocdecode/vaapi/vaapi_videodecoder.h index 141ea18d..86f5e99f 100644 --- a/src/rocdecode/vaapi/vaapi_videodecoder.h +++ b/src/rocdecode/vaapi/vaapi_videodecoder.h @@ -129,14 +129,14 @@ class VaapiVideoDecoder { rocDecStatus DestroyDataBuffers(); }; -// The GpuVaContext singleton class providing access to the the GPU VA services -class GpuVaContext { +// The VaContext singleton class providing access to the the GPU VA services +class VaContext { public: int num_devices_; std::vector va_contexts_; - static GpuVaContext& GetInstance() { - static GpuVaContext instance; + static VaContext& GetInstance() { + static VaContext instance; return instance; } rocDecStatus GetVaContext(int device_id, uint32_t *va_ctx_id); @@ -154,10 +154,10 @@ class GpuVaContext { */ std::unordered_map gpu_uuids_to_render_nodes_map_; - GpuVaContext(); - GpuVaContext(const GpuVaContext&) = delete; - GpuVaContext& operator = (const GpuVaContext) = delete; - ~GpuVaContext(); + VaContext(); + VaContext(const VaContext&) = delete; + VaContext& operator = (const VaContext) = delete; + ~VaContext(); rocDecStatus InitHIP(int device_id, hipDeviceProp_t& hip_dev_prop); rocDecStatus InitVAAPI(int va_ctx_idx, std::string drm_node);