From 9ab05a609568ac718cfe26bb306c6684dac6d81a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 11 May 2024 01:48:07 +1000 Subject: [PATCH 1/2] GS/Vulkan: Always issue first barrier on RDNA3 It turns out *not* doing this causes GPU resets on RDNA3, specifically Windows drivers. Despite the layout changing enforcing the execution dependency between previous draws and the first input attachment read, it still wants the region/fragment-local barrier... --- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 7 ++++++- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 6625f35d32947..884258a8d40f0 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -5750,8 +5750,13 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) // We don't need the very first barrier if this is the first draw after switching to feedback loop, // because the layout change in itself enforces the execution dependency. HDR needs a barrier between // setup and the first draw to read it. TODO: Make HDR use subpasses instead. + + // However, it turns out *not* doing this causes GPU resets on RDNA3, specifically Windows drivers. + // Despite the layout changing enforcing the execution dependency between previous draws and the first + // input attachment read, it still wants the region/fragment-local barrier... + const bool skip_first_barrier = - (draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.hdr); + (draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.hdr && !IsDeviceAMD()); OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast(pipe.feedback_loop_flags)); if (pipe.IsRTFeedbackLoop()) diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 8bfe12277f61b..0f7871a360ebf 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -68,6 +68,9 @@ class GSDeviceVK final : public GSDevice /// Returns true if running on an NVIDIA GPU. __fi bool IsDeviceNVIDIA() const { return (m_device_properties.vendorID == 0x10DE); } + /// Returns true if running on an AMD GPU. + __fi bool IsDeviceAMD() const { return (m_device_properties.vendorID == 0x1002); } + // Creates a simple render pass. VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, VkAttachmentLoadOp color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD, From bbdda4a6f0fa062798e2b5f50a6f5f9abd116b06 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 11 May 2024 01:59:55 +1000 Subject: [PATCH 2/2] GS/HW: Avoid barriers on second alpha pass when only writing to Z Completely redundant. We also don't need to use the drawlist. --- pcsx2/GS/Renderers/Common/GSDevice.h | 6 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 12 ++- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp | 100 +++++++++++++---------- pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h | 3 +- 4 files changed, 74 insertions(+), 47 deletions(-) diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index ba18334f68521..f3afed1efae05 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -387,7 +387,7 @@ struct alignas(16) GSHWDrawConfig __fi bool IsFeedbackLoop() const { const u32 sw_blend_bits = blend_a | blend_b | blend_d; - const bool sw_blend_needs_rt = sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u); + const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0); return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt; } @@ -688,7 +688,9 @@ struct alignas(16) GSHWDrawConfig struct AlphaPass { alignas(8) PSSelector ps; - bool enable; + bool enable : 1; + bool require_one_barrier : 1; + bool require_full_barrier : 1; ColorMaskSelector colormask; DepthStencilSelector depth; float ps_aref; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 6f313831bbdea..efaf1058b8e9b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5910,11 +5910,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta SetupIA(rtscale, sx, sy); - m_conf.alpha_second_pass.enable = ate_second_pass; - if (ate_second_pass) { pxAssert(!env.PABE.PABE); + std::memcpy(&m_conf.alpha_second_pass.ps, &m_conf.ps, sizeof(m_conf.ps)); std::memcpy(&m_conf.alpha_second_pass.colormask, &m_conf.colormask, sizeof(m_conf.colormask)); std::memcpy(&m_conf.alpha_second_pass.depth, &m_conf.depth, sizeof(m_conf.depth)); @@ -5958,6 +5957,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta r = g = b = a = false; } + m_conf.alpha_second_pass.enable = true; + if (z || r || g || b || a) { m_conf.alpha_second_pass.depth.zwe = z; @@ -5966,7 +5967,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.alpha_second_pass.colormask.wb = b; m_conf.alpha_second_pass.colormask.wa = a; if (m_conf.alpha_second_pass.colormask.wrgba == 0) + { m_conf.alpha_second_pass.ps.DisableColorOutput(); + } + if (m_conf.alpha_second_pass.ps.IsFeedbackLoop()) + { + m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier; + m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier; + } } else { diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 884258a8d40f0..72ec313468f37 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -5839,7 +5839,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) // now we can do the actual draw if (BindDrawPipeline(pipe)) - SendHWDraw(config, draw_rt, skip_first_barrier); + SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier); // blend second pass if (config.blend_second_pass.enable) @@ -5851,7 +5851,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) pipe.ps.blend_hw = config.blend_second_pass.blend_hw; pipe.ps.dither = config.blend_second_pass.dither; if (BindDrawPipeline(pipe)) + { + // TODO: This probably should have barriers, in case we want to use it conditionally. DrawIndexedPrimitive(); + } } // and the alpha pass @@ -5869,7 +5872,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) pipe.dss = config.alpha_second_pass.depth; pipe.bs = config.blend; if (BindDrawPipeline(pipe)) - SendHWDraw(config, draw_rt, false); + { + SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier, + config.alpha_second_pass.require_full_barrier, false); + } } if (draw_rt_clone) @@ -5972,49 +5978,57 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; } -void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier) +void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, + bool one_barrier, bool full_barrier, bool skip_first_barrier) { - if (config.drawlist) + if (!m_features.texture_barrier) [[unlikely]] { - GL_PUSH("Split the draw (SPRITE)"); - g_perfmon.Put( - GSPerfMon::Barriers, static_cast(config.drawlist->size()) - static_cast(skip_first_barrier)); - - const u32 indices_per_prim = config.indices_per_prim; - const u32 draw_list_size = static_cast(config.drawlist->size()); - const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); - u32 p = 0; - u32 n = 0; - - if (skip_first_barrier) - { - const u32 count = (*config.drawlist)[n] * indices_per_prim; - DrawIndexedPrimitive(p, count); - p += count; - ++n; - } - - for (; n < draw_list_size; n++) - { - vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); - - const u32 count = (*config.drawlist)[n] * indices_per_prim; - DrawIndexedPrimitive(p, count); - p += count; - } - + DrawIndexedPrimitive(); return; } - if (m_features.texture_barrier && m_pipeline_selector.ps.IsFeedbackLoop()) +#ifdef PCSX2_DEVBUILD + if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]] + Console.Warning("GS: Possible unnecessary barrier detected."); +#endif + + if (full_barrier) { const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + const u32 indices_per_prim = config.indices_per_prim; - if (config.require_full_barrier) + if (config.drawlist) { + GL_PUSH("Split the draw (SPRITE)"); + g_perfmon.Put( + GSPerfMon::Barriers, static_cast(config.drawlist->size()) - static_cast(skip_first_barrier)); + const u32 indices_per_prim = config.indices_per_prim; + const u32 draw_list_size = static_cast(config.drawlist->size()); + const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + u32 p = 0; + u32 n = 0; + if (skip_first_barrier) + { + const u32 count = (*config.drawlist)[n] * indices_per_prim; + DrawIndexedPrimitive(p, count); + p += count; + ++n; + } + + for (; n < draw_list_size; n++) + { + vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); + + const u32 count = (*config.drawlist)[n] * indices_per_prim; + DrawIndexedPrimitive(p, count); + p += count; + } + } + else + { GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim); g_perfmon.Put( GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast(skip_first_barrier)); @@ -6033,16 +6047,18 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, DrawIndexedPrimitive(p, indices_per_prim); } - - return; } - if (config.require_one_barrier && !skip_first_barrier) - { - g_perfmon.Put(GSPerfMon::Barriers, 1); - vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); - } + return; + } + + if (one_barrier && !skip_first_barrier) + { + g_perfmon.Put(GSPerfMon::Barriers, 1); + + const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); } DrawIndexedPrimitive(); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 0f7871a360ebf..6fff081482693 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -588,7 +588,8 @@ class GSDeviceVK final : public GSDevice void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe); void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config); VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const; - void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier); + void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, + bool one_barrier, bool full_barrier, bool skip_first_barrier); ////////////////////////////////////////////////////////////////////////// // Vulkan State