diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index ba18334f68521..f3afed1efae05 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -387,7 +387,7 @@ struct alignas(16) GSHWDrawConfig __fi bool IsFeedbackLoop() const { const u32 sw_blend_bits = blend_a | blend_b | blend_d; - const bool sw_blend_needs_rt = sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u); + const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0); return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt; } @@ -688,7 +688,9 @@ struct alignas(16) GSHWDrawConfig struct AlphaPass { alignas(8) PSSelector ps; - bool enable; + bool enable : 1; + bool require_one_barrier : 1; + bool require_full_barrier : 1; ColorMaskSelector colormask; DepthStencilSelector depth; float ps_aref; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 6f313831bbdea..efaf1058b8e9b 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -5910,11 +5910,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta SetupIA(rtscale, sx, sy); - m_conf.alpha_second_pass.enable = ate_second_pass; - if (ate_second_pass) { pxAssert(!env.PABE.PABE); + std::memcpy(&m_conf.alpha_second_pass.ps, &m_conf.ps, sizeof(m_conf.ps)); std::memcpy(&m_conf.alpha_second_pass.colormask, &m_conf.colormask, sizeof(m_conf.colormask)); std::memcpy(&m_conf.alpha_second_pass.depth, &m_conf.depth, sizeof(m_conf.depth)); @@ -5958,6 +5957,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta r = g = b = a = false; } + m_conf.alpha_second_pass.enable = true; + if (z || r || g || b || a) { m_conf.alpha_second_pass.depth.zwe = z; @@ -5966,7 +5967,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta m_conf.alpha_second_pass.colormask.wb = b; m_conf.alpha_second_pass.colormask.wa = a; if (m_conf.alpha_second_pass.colormask.wrgba == 0) + { m_conf.alpha_second_pass.ps.DisableColorOutput(); + } + if (m_conf.alpha_second_pass.ps.IsFeedbackLoop()) + { + m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier; + m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier; + } } else { diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 6625f35d32947..72ec313468f37 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -5750,8 +5750,13 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) // We don't need the very first barrier if this is the first draw after switching to feedback loop, // because the layout change in itself enforces the execution dependency. HDR needs a barrier between // setup and the first draw to read it. TODO: Make HDR use subpasses instead. + + // However, it turns out *not* doing this causes GPU resets on RDNA3, specifically Windows drivers. + // Despite the layout changing enforcing the execution dependency between previous draws and the first + // input attachment read, it still wants the region/fragment-local barrier... + const bool skip_first_barrier = - (draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.hdr); + (draw_rt && draw_rt->GetLayout() != GSTextureVK::Layout::FeedbackLoop && !pipe.ps.hdr && !IsDeviceAMD()); OMSetRenderTargets(draw_rt, draw_ds, config.scissor, static_cast(pipe.feedback_loop_flags)); if (pipe.IsRTFeedbackLoop()) @@ -5834,7 +5839,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) // now we can do the actual draw if (BindDrawPipeline(pipe)) - SendHWDraw(config, draw_rt, skip_first_barrier); + SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier); // blend second pass if (config.blend_second_pass.enable) @@ -5846,7 +5851,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) pipe.ps.blend_hw = config.blend_second_pass.blend_hw; pipe.ps.dither = config.blend_second_pass.dither; if (BindDrawPipeline(pipe)) + { + // TODO: This probably should have barriers, in case we want to use it conditionally. DrawIndexedPrimitive(); + } } // and the alpha pass @@ -5864,7 +5872,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config) pipe.dss = config.alpha_second_pass.depth; pipe.bs = config.blend; if (BindDrawPipeline(pipe)) - SendHWDraw(config, draw_rt, false); + { + SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier, + config.alpha_second_pass.require_full_barrier, false); + } } if (draw_rt_clone) @@ -5967,49 +5978,57 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}}; } -void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier) +void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, + bool one_barrier, bool full_barrier, bool skip_first_barrier) { - if (config.drawlist) + if (!m_features.texture_barrier) [[unlikely]] { - GL_PUSH("Split the draw (SPRITE)"); - g_perfmon.Put( - GSPerfMon::Barriers, static_cast(config.drawlist->size()) - static_cast(skip_first_barrier)); - - const u32 indices_per_prim = config.indices_per_prim; - const u32 draw_list_size = static_cast(config.drawlist->size()); - const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); - u32 p = 0; - u32 n = 0; - - if (skip_first_barrier) - { - const u32 count = (*config.drawlist)[n] * indices_per_prim; - DrawIndexedPrimitive(p, count); - p += count; - ++n; - } - - for (; n < draw_list_size; n++) - { - vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); - - const u32 count = (*config.drawlist)[n] * indices_per_prim; - DrawIndexedPrimitive(p, count); - p += count; - } - + DrawIndexedPrimitive(); return; } - if (m_features.texture_barrier && m_pipeline_selector.ps.IsFeedbackLoop()) +#ifdef PCSX2_DEVBUILD + if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]] + Console.Warning("GS: Possible unnecessary barrier detected."); +#endif + + if (full_barrier) { const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + const u32 indices_per_prim = config.indices_per_prim; - if (config.require_full_barrier) + if (config.drawlist) { + GL_PUSH("Split the draw (SPRITE)"); + g_perfmon.Put( + GSPerfMon::Barriers, static_cast(config.drawlist->size()) - static_cast(skip_first_barrier)); + const u32 indices_per_prim = config.indices_per_prim; + const u32 draw_list_size = static_cast(config.drawlist->size()); + const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + u32 p = 0; + u32 n = 0; + + if (skip_first_barrier) + { + const u32 count = (*config.drawlist)[n] * indices_per_prim; + DrawIndexedPrimitive(p, count); + p += count; + ++n; + } + + for (; n < draw_list_size; n++) + { + vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); + const u32 count = (*config.drawlist)[n] * indices_per_prim; + DrawIndexedPrimitive(p, count); + p += count; + } + } + else + { GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim); g_perfmon.Put( GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast(skip_first_barrier)); @@ -6028,16 +6047,18 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, DrawIndexedPrimitive(p, indices_per_prim); } - - return; } - if (config.require_one_barrier && !skip_first_barrier) - { - g_perfmon.Put(GSPerfMon::Barriers, 1); - vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); - } + return; + } + + if (one_barrier && !skip_first_barrier) + { + g_perfmon.Put(GSPerfMon::Barriers, 1); + + const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt); + vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier); } DrawIndexedPrimitive(); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h index 8bfe12277f61b..6fff081482693 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h @@ -68,6 +68,9 @@ class GSDeviceVK final : public GSDevice /// Returns true if running on an NVIDIA GPU. __fi bool IsDeviceNVIDIA() const { return (m_device_properties.vendorID == 0x10DE); } + /// Returns true if running on an AMD GPU. + __fi bool IsDeviceAMD() const { return (m_device_properties.vendorID == 0x1002); } + // Creates a simple render pass. VkRenderPass GetRenderPass(VkFormat color_format, VkFormat depth_format, VkAttachmentLoadOp color_load_op = VK_ATTACHMENT_LOAD_OP_LOAD, @@ -585,7 +588,8 @@ class GSDeviceVK final : public GSDevice void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe); void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config); VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const; - void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier); + void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, + bool one_barrier, bool full_barrier, bool skip_first_barrier); ////////////////////////////////////////////////////////////////////////// // Vulkan State