From e1fba446c5fb62adfeab27eac40db4a26d122927 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Sat, 22 Oct 2022 11:43:29 +0100 Subject: [PATCH] GS-HW: SW Render CLUT draws --- .github/workflows/scripts/lint/gamedb/lint.py | 2 + bin/resources/GameIndex.yaml | 14 ++ pcsx2-qt/Settings/GraphicsSettingsWidget.cpp | 3 +- pcsx2-qt/Settings/GraphicsSettingsWidget.ui | 162 ++++++++++------- pcsx2/Config.h | 1 + pcsx2/Frontend/FullscreenUI.cpp | 3 + pcsx2/Frontend/ImGuiOverlays.cpp | 2 + pcsx2/GS/GS.cpp | 4 +- pcsx2/GS/GSClut.cpp | 56 +++--- pcsx2/GS/GSClut.h | 12 +- pcsx2/GS/GSLocalMemory.cpp | 1 + pcsx2/GS/GSLocalMemory.h | 20 +-- pcsx2/GS/GSState.cpp | 163 +++++++++++++----- pcsx2/GS/GSState.h | 2 + pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 122 +++++++++++++ pcsx2/GS/Renderers/HW/GSRendererHW.h | 2 + pcsx2/GameDatabase.cpp | 7 + pcsx2/GameDatabase.h | 1 + pcsx2/Pcsx2Config.cpp | 3 + 19 files changed, 429 insertions(+), 151 deletions(-) diff --git a/.github/workflows/scripts/lint/gamedb/lint.py b/.github/workflows/scripts/lint/gamedb/lint.py index 6f046173805853..8550011ccd0caf 100644 --- a/.github/workflows/scripts/lint/gamedb/lint.py +++ b/.github/workflows/scripts/lint/gamedb/lint.py @@ -62,6 +62,7 @@ "texturePreloading", "deinterlace", "cpuSpriteRenderBW", + "cpuCLUTRender", "gpuPaletteConversion", ] gs_hw_fix_ranges = { @@ -73,6 +74,7 @@ "roundSprite": (0, 2), "deinterlace": (0, 7), "cpuSpriteRenderBW": (1, 10), + "cpuCLUTRender": (1, 2), "gpuPaletteConversion": (0, 2), } allowed_speed_hacks = ["mvuFlagSpeedHack", "InstantVU1SpeedHack", "MTVUSpeedHack"] diff --git a/bin/resources/GameIndex.yaml b/bin/resources/GameIndex.yaml index 5f6f493f8a6a69..6f50190f5e3b30 100644 --- a/bin/resources/GameIndex.yaml +++ b/bin/resources/GameIndex.yaml @@ -16808,6 +16808,8 @@ SLES-53556: region: "PAL-M3" gameFixes: - BlitInternalFPSHack # Fixes internal FPS detection. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes some bad textures. SLES-53557: name: "Need for Speed - Most Wanted" region: "PAL-E" @@ -17992,6 +17994,8 @@ SLES-54027: region: "PAL-M3" gameFixes: - BlitInternalFPSHack # Fixes internal FPS detection. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes some bad textures. SLES-54030: name: "Black" region: "PAL-E" @@ -23003,6 +23007,8 @@ SLKA-25341: region: "NTSC-K" gameFixes: - BlitInternalFPSHack # Fixes internal FPS detection. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes some bad textures. SLKA-25342: name: "Ryu ga Gotoku" region: "NTSC-K" @@ -31460,6 +31466,8 @@ SLPM-66567: region: "NTSC-J" gameFixes: - BlitInternalFPSHack # Fixes internal FPS detection. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes some bad textures. SLPM-66568: name: "Brothers In Arms - Road to Hill 30 [Ubisoft Best]" region: "NTSC-J" @@ -44618,6 +44626,8 @@ SLUS-21271: compat: 5 gameFixes: - BlitInternalFPSHack # Fixes internal FPS detection. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes some bad textures. SLUS-21272: name: "Super Monkey Ball Adventure" region: "NTSC-U" @@ -45342,6 +45352,8 @@ SLUS-21399: region: "NTSC-U" gameFixes: - BlitInternalFPSHack # Fixes internal FPS detection. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes some bad textures. SLUS-21400: name: "Monster House" region: "NTSC-U" @@ -48460,6 +48472,8 @@ SLUS-29185: region: "NTSC-U" gameFixes: - BlitInternalFPSHack # Fixes internal FPS detection. + gsHWFixes: + cpuSpriteRenderBW: 2 # Fixes some bad textures. SLUS-29188: name: "Steambot Chronicles [Regular Demo]" region: "NTSC-U" diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp index 021aabcf9629ec..09b3d29a34852c 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.cpp @@ -236,6 +236,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* ////////////////////////////////////////////////////////////////////////// SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.halfScreenFix, "EmuCore/GS", "UserHacks_Half_Bottom_Override", -1, -1); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuSpriteRenderBW, "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0); + SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuCLUTRender, "EmuCore/GS", "UserHacks_CPUCLUTRender", 0); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawStart, "EmuCore/GS", "UserHacks_SkipDraw_Start", 0); SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawEnd, "EmuCore/GS", "UserHacks_SkipDraw_End", 0); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.hwAutoFlush, "EmuCore/GS", "UserHacks_AutoFlush", false); @@ -334,7 +335,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget* if (!m_dialog->isPerGameSettings()) { m_ui.upscalingFixesLayout->removeRow(2); - m_ui.hardwareFixesLayout->removeRow(2); + m_ui.hardwareFixesLayout->removeRow(1); m_ui.skipDrawStart = nullptr; m_ui.skipDrawEnd = nullptr; m_ui.textureOffsetX = nullptr; diff --git a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui index e91d0e8b3950ae..349cc0e62f788f 100644 --- a/pcsx2-qt/Settings/GraphicsSettingsWidget.ui +++ b/pcsx2-qt/Settings/GraphicsSettingsWidget.ui @@ -58,7 +58,7 @@ - 0 + 2 true @@ -388,7 +388,7 @@ - + @@ -692,14 +692,80 @@ - + + + + CPU Sprite Render Size: + + + + + + + + 0 (Disabled) + + + + + 1 (64 Max Width) + + + + + 2 (128 Max Width) + + + + + 3 (192 Max Width) + + + + + 4 (256 Max Width) + + + + + 5 (320 Max Width) + + + + + 6 (384 Max Width) + + + + + 7 (448 Max Width) + + + + + 8 (512 Max Width) + + + + + 9 (576 Max Width) + + + + + 10 (640 Max Width) + + + + + Skipdraw Range: - + @@ -717,7 +783,7 @@ - + @@ -777,15 +843,14 @@ - - - - CPU Sprite Render Size: + + + + 0 (Disabled) + + + 0 - - - - 0 (Disabled) @@ -793,56 +858,23 @@ - 1 (64 Max Width) - - - - - 2 (128 Max Width) - - - - - 3 (192 Max Width) - - - - - 4 (256 Max Width) - - - - - 5 (320 Max Width) - - - - - 6 (384 Max Width) - - - - - 7 (448 Max Width) - - - - - 8 (512 Max Width) + 1 (Normal) - 9 (576 Max Width) - - - - - 10 (640 Max Width) + 2 (Aggressive) + + + + Software CLUT Render + + + @@ -1537,6 +1569,13 @@ Rendering + + + + Texture Filtering: + + + @@ -1573,10 +1612,10 @@ - - + + - Texture Filtering: + Extra Rendering Threads: @@ -1587,13 +1626,6 @@ - - - - Extra Rendering Threads: - - - diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 4b93724d5330f2..5ea866bf86efbb 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -585,6 +585,7 @@ struct Pcsx2Config int UserHacks_TCOffsetX{0}; int UserHacks_TCOffsetY{0}; int UserHacks_CPUSpriteRenderBW{0}; + int UserHacks_CPUCLUTRender{ 0 }; TriFiltering TriFilter{TriFiltering::Automatic}; int OverrideTextureBarriers{-1}; int OverrideGeometryShaders{-1}; diff --git a/pcsx2/Frontend/FullscreenUI.cpp b/pcsx2/Frontend/FullscreenUI.cpp index 9d5d0cf120b0f1..79e328a8de35ac 100644 --- a/pcsx2/Frontend/FullscreenUI.cpp +++ b/pcsx2/Frontend/FullscreenUI.cpp @@ -2683,6 +2683,7 @@ void FullscreenUI::DrawGraphicsSettingsPage() static constexpr const char* s_cpu_sprite_render_bw_options[] = {"0 (Disabled)", "1 (64 Max Width)", "2 (128 Max Width)", "3 (192 Max Width)", "4 (256 Max Width)", "5 (320 Max Width)", "6 (384 Max Width)", "7 (448 Max Width)", "8 (512 Max Width)", "9 (576 Max Width)", "10 (640 Max Width)"}; + static constexpr const char* s_cpu_clut_render_options[] = { "0 (Disabled)", "1 (Normal)", "2 (Aggressive)" }; static constexpr const char* s_half_pixel_offset_options[] = { "Off (Default)", "Normal (Vertex)", "Special (Texture)", "Special (Texture - Aggressive)"}; static constexpr const char* s_round_sprite_options[] = {"Off (Default)", "Half", "Full"}; @@ -2691,6 +2692,8 @@ void FullscreenUI::DrawGraphicsSettingsPage() "UserHacks_Half_Bottom_Override", -1, s_generic_options, std::size(s_generic_options), -1); DrawIntListSetting(bsi, "CPU Sprite Render Size", "Uses sofware renderer to draw texture decompression-like sprites.", "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0, s_cpu_sprite_render_bw_options, std::size(s_cpu_sprite_render_bw_options)); + DrawIntListSetting(bsi, "CPU Sprite Render Size", "Uses sofware renderer to draw texture decompression-like sprites.", + "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0, s_cpu_clut_render_options, std::size(s_cpu_clut_render_options)); DrawIntRangeSetting( bsi, "Skip Draw Start", "Object range to skip drawing.", "EmuCore/GS", "UserHacks_SkipDraw_Start", 0, 0, 5000); DrawIntRangeSetting(bsi, "Skip Draw End", "Object range to skip drawing.", "EmuCore/GS", "UserHacks_SkipDraw_End", 0, 0, 5000); diff --git a/pcsx2/Frontend/ImGuiOverlays.cpp b/pcsx2/Frontend/ImGuiOverlays.cpp index 92cc5255372250..54787acea382d4 100644 --- a/pcsx2/Frontend/ImGuiOverlays.cpp +++ b/pcsx2/Frontend/ImGuiOverlays.cpp @@ -309,6 +309,8 @@ void ImGuiManager::DrawSettingsOverlay() APPEND("TCO={}/{} ", GSConfig.UserHacks_TCOffsetX, GSConfig.UserHacks_TCOffsetY); if (GSConfig.UserHacks_CPUSpriteRenderBW != 0) APPEND("CSBW={} ", GSConfig.UserHacks_CPUSpriteRenderBW); + if (GSConfig.UserHacks_CPUCLUTRender != 0) + APPEND("CCD={} ", GSConfig.UserHacks_CPUCLUTRender); if (GSConfig.SkipDrawStart != 0 || GSConfig.SkipDrawEnd != 0) APPEND("SD={}/{} ", GSConfig.SkipDrawStart, GSConfig.SkipDrawEnd); if (GSConfig.UserHacks_TextureInsideRt) diff --git a/pcsx2/GS/GS.cpp b/pcsx2/GS/GS.cpp index efac23be36387b..2bbe37e39cae10 100644 --- a/pcsx2/GS/GS.cpp +++ b/pcsx2/GS/GS.cpp @@ -834,7 +834,8 @@ void GSUpdateConfig(const Pcsx2Config::GSOptions& new_config) GSConfig.UserHacks_DisableDepthSupport != old_config.UserHacks_DisableDepthSupport || GSConfig.UserHacks_DisablePartialInvalidation != old_config.UserHacks_DisablePartialInvalidation || GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt || - GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUSpriteRenderBW) + GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUSpriteRenderBW || + GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUCLUTRender) { g_gs_renderer->PurgeTextureCache(); g_gs_renderer->PurgePool(); @@ -1512,6 +1513,7 @@ void GSApp::Init() m_default_configuration["UserHacks_Disable_Safe_Features"] = "0"; m_default_configuration["UserHacks_DisablePartialInvalidation"] = "0"; m_default_configuration["UserHacks_CPUSpriteRenderBW"] = "0"; + m_default_configuration["UserHacks_CPUCLUTRender"] = "0"; m_default_configuration["UserHacks_CPU_FB_Conversion"] = "0"; m_default_configuration["UserHacks_Half_Bottom_Override"] = "-1"; m_default_configuration["UserHacks_HalfPixelOffset"] = "0"; diff --git a/pcsx2/GS/GSClut.cpp b/pcsx2/GS/GSClut.cpp index 02b4158dd60e60..96573c0a97825a 100644 --- a/pcsx2/GS/GSClut.cpp +++ b/pcsx2/GS/GSClut.cpp @@ -28,7 +28,7 @@ GSClut::GSClut(GSLocalMemory* mem) m_clut = (u16*)&p[0]; // 1k + 1k for mirrored area simulating wrapping memory m_buff32 = (u32*)&p[2048]; // 1k m_buff64 = (u64*)&p[4096]; // 2k - m_write.dirty = true; + m_write.dirty = 1; m_read.dirty = true; for (int i = 0; i < 16; i++) @@ -103,34 +103,40 @@ GSClut::~GSClut() vmfree(m_clut, CLUT_ALLOC_SIZE); } -void GSClut::Invalidate() +u8 GSClut::IsInvalid() { - m_write.dirty = true; + return m_write.dirty; } -void GSClut::InvalidateRange(u32 start_block, u32 end_block) +u32 GSClut::GetCLUTCBP() { - u32 blocks = 4; - - if (GSLocalMemory::m_psm[m_write.TEX0.CPSM].bpp == 16) - blocks >>= 1; + return m_write.TEX0.CBP; +} - if (GSLocalMemory::m_psm[m_write.TEX0.PSM].bpp == 4) - blocks >>= 1; +void GSClut::SetNextCLUTCBP(u64 CBP) +{ + m_write.next_cpb = CBP; +} - if ((m_write.TEX0.CBP + blocks) >= start_block && m_write.TEX0.CBP <= end_block) - { - m_write.dirty = true; - } +u64 GSClut::GetNextCLUTCBP() +{ + return m_write.next_cpb; } -// Check the whole page, if the CLUT is slightly offset from a page boundary it could miss it. -void GSClut::Invalidate(u32 block) +bool GSClut::InvalidateRange(u32 start_block, u32 end_block, bool is_draw) { - if (!((block ^ m_write.TEX0.CBP) & ~0x1F)) + if (m_write.dirty) + return m_write.dirty; + + GIFRegTEX0 next_cbp; + next_cbp.U64 = m_write.next_cpb; + + if ((next_cbp.CBP + 4) >= start_block && end_block >= next_cbp.CBP) { - m_write.dirty = true; + m_write.dirty |= is_draw ? 2 : 1; } + + return m_write.dirty; } bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) @@ -163,14 +169,14 @@ bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) m_CBP[1] = TEX0.CBP; break; case 6: - return false; // ffx2 menu + return false; // ffx2 menu. case 7: - return false; // ford mustang racing // Bouken Jidai Katsugeki Goemon + return false; // ford mustang racing // Bouken Jidai Katsugeki Goemon. default: __assume(0); } - // CLUT only reloads if PSM is a valid index type, avoid unnecessary flushes + // CLUT only reloads if PSM is a valid index type, avoid unnecessary flushes. return m_write.IsDirty(TEX0, TEXCLUT); } @@ -179,7 +185,7 @@ void GSClut::Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT) m_write.TEX0 = TEX0; m_write.TEXCLUT = TEXCLUT; m_read.dirty = true; - m_write.dirty = false; + m_write.dirty = 0; (this->*m_wc[TEX0.CSM][TEX0.CPSM][TEX0.PSM])(TEX0, TEXCLUT); } @@ -775,7 +781,7 @@ bool GSClut::WriteState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TE bool is_dirty = dirty; - if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp)) + if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].pal != GSLocalMemory::m_psm[TEX0.PSM].pal)) is_dirty |= true; else if (TEX0.CSM == 1 && (TEXCLUT.U32[0] ^ this->TEXCLUT.U32[0])) is_dirty |= true; @@ -795,7 +801,7 @@ bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0) bool is_dirty = dirty; - if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp)) + if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].pal != GSLocalMemory::m_psm[TEX0.PSM].pal)) is_dirty |= true; if (!is_dirty) @@ -814,7 +820,7 @@ bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) bool is_dirty = dirty; - if (((this->TEX0.U64 ^ TEX0.U64) & tex0_mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp)) + if (((this->TEX0.U64 ^ TEX0.U64) & tex0_mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].pal != GSLocalMemory::m_psm[TEX0.PSM].pal)) is_dirty |= true; else // Just to optimise the checks. { diff --git a/pcsx2/GS/GSClut.h b/pcsx2/GS/GSClut.h index 24fe06572858d2..404cf52eda6825 100644 --- a/pcsx2/GS/GSClut.h +++ b/pcsx2/GS/GSClut.h @@ -39,7 +39,8 @@ class alignas(32) GSClut : public GSAlignedClass<32> { GIFRegTEX0 TEX0; GIFRegTEXCLUT TEXCLUT; - bool dirty; + u8 dirty; + u64 next_cpb; bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); } m_write; @@ -100,9 +101,12 @@ class alignas(32) GSClut : public GSAlignedClass<32> GSClut(GSLocalMemory* mem); virtual ~GSClut(); - void Invalidate(); - void Invalidate(u32 block); - void InvalidateRange(u32 start_block, u32 end_block); + bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false); + u8 IsInvalid(); + void DrawCLUT(); + u32 GetCLUTCBP(); + void SetNextCLUTCBP(u64 CBP); + u64 GetNextCLUTCBP(); bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT); //void Read(const GIFRegTEX0& TEX0); diff --git a/pcsx2/GS/GSLocalMemory.cpp b/pcsx2/GS/GSLocalMemory.cpp index 5e1efbd96e6419..b2bfea063718a6 100644 --- a/pcsx2/GS/GSLocalMemory.cpp +++ b/pcsx2/GS/GSLocalMemory.cpp @@ -84,6 +84,7 @@ GSLocalMemory::GSLocalMemory() for (psm_t& psm : m_psm) { psm.info = GSLocalMemory::swizzle32; + psm.pa = &GSLocalMemory::GetAddress32; psm.rp = &GSLocalMemory::ReadPixel32; psm.rpa = &GSLocalMemory::ReadPixel32; psm.wp = &GSLocalMemory::WritePixel32; diff --git a/pcsx2/GS/GSLocalMemory.h b/pcsx2/GS/GSLocalMemory.h index c76c1508035911..85a894f165b15b 100644 --- a/pcsx2/GS/GSLocalMemory.h +++ b/pcsx2/GS/GSLocalMemory.h @@ -444,6 +444,7 @@ class GSLocalMemory : public GSAlignedClass<32> typedef u32 (GSLocalMemory::*readTexel)(int x, int y, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA) const; typedef void (GSLocalMemory::*writePixelAddr)(u32 addr, u32 c); typedef void (GSLocalMemory::*writeFrameAddr)(u32 addr, u32 c); + typedef u32(GSLocalMemory::*PixelAddr)(int x, int y, u32 bp, u32 bw) const; typedef u32 (GSLocalMemory::*readPixelAddr)(u32 addr) const; typedef u32 (GSLocalMemory::*readTexelAddr)(u32 addr, const GIFRegTEXA& TEXA) const; typedef void (GSLocalMemory::*writeImage)(int& tx, int& ty, const u8* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG); @@ -456,6 +457,7 @@ class GSLocalMemory : public GSAlignedClass<32> GSSwizzleInfo info; readPixel rp; readPixelAddr rpa; + PixelAddr pa; writePixel wp; writePixelAddr wpa; readTexel rt; @@ -531,17 +533,6 @@ class GSLocalMemory : public GSAlignedClass<32> GSPixelOffset4* GetPixelOffset4(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); std::vector* GetPage2TileMap(const GIFRegTEX0& TEX0); - static u32 GetEndBlock(int bp, int bw, int w, int h, int psm) - { - const GSLocalMemory::psm_t& dpsm = GSLocalMemory::m_psm[psm]; - const int page_width = std::max(1, w / dpsm.pgs.x); - const int page_height = std::max(1, h / dpsm.pgs.y); - const int pitch = (std::max(1, bw) * 64) / dpsm.pgs.x; - const u32 end_bp = bp + ((((page_height % dpsm.pgs.y) != 0) ? (page_width << 5) : 0) + ((page_height * pitch) << 5)); - - return end_bp; - } - // address static u32 BlockNumber32(int x, int y, u32 bp, u32 bw) @@ -677,6 +668,13 @@ class GSLocalMemory : public GSAlignedClass<32> return swizzle16SZ.pa(x, y, bp, bw); } + // Direct address for CLUT invalidation. + + __forceinline u32 GetAddress32(int x, int y, u32 bp, u32 bw) const + { + return PixelAddress32(x, y, bp, bw); + } + // pixel R/W __forceinline u32 ReadPixel32(u32 addr) const diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index f88e06bc16eaed..d0c5cab177e225 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -777,16 +777,8 @@ __inline void GSState::CheckFlushes() if (m_dirty_gs_regs && m_index.tail > 0) { if (TestDrawChanged()) - { Flush(GSFlushReason::CONTEXTCHANGE); - } } - if ((m_context->FRAME.FBMSK & GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) != GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) - m_mem.m_clut.Invalidate(m_context->FRAME.Block()); - - // Hey, why not check? I mean devs have done crazier things.. - if(!m_context->ZBUF.ZMSK) - m_mem.m_clut.Invalidate(m_context->ZBUF.Block()); } void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r) @@ -1073,13 +1065,14 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) GL_REG("Apply TEX0_%d = 0x%x_%x", i, TEX0.U32[1], TEX0.U32[0]); - // even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing + // Even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing. const bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT); - // clut loading already covered with WriteTest, for drawing only have to check CPSM and CSA (MGS3 intro skybox would be drawn piece by piece without this) - if (wt) + { + m_mem.m_clut.SetNextCLUTCBP(TEX0.U64); Flush(GSFlushReason::CLUTCHANGE); + } TEX0.CPSM &= 0xa; // 1010b @@ -1097,7 +1090,7 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) { BITBLTBUF.SBP = TEX0.CBP; BITBLTBUF.SBW = 1; - BITBLTBUF.SPSM = TEX0.CSM; + BITBLTBUF.SPSM = TEX0.CPSM; r.left = 0; r.top = 0; @@ -1106,12 +1099,13 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) int blocks = 4; - if (GSLocalMemory::m_psm[TEX0.CPSM].bpp == 16) + if (GSLocalMemory::m_psm[TEX0.CPSM].trbpp == 16) blocks >>= 1; - if (GSLocalMemory::m_psm[TEX0.PSM].bpp == 4) + if (GSLocalMemory::m_psm[TEX0.PSM].trbpp == 4) blocks >>= 1; + // Invalidating videomem is slow, so *only* do it when it's definitely a CLUT draw in HW mode. for (int j = 0; j < blocks; j++, BITBLTBUF.SBP++) InvalidateLocalMem(BITBLTBUF, r, true); } @@ -1119,7 +1113,7 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) { BITBLTBUF.SBP = TEX0.CBP; BITBLTBUF.SBW = m_env.TEXCLUT.CBW; - BITBLTBUF.SPSM = TEX0.CSM; + BITBLTBUF.SPSM = TEX0.CPSM; r.left = m_env.TEXCLUT.COU; r.top = m_env.TEXCLUT.COV; @@ -1148,8 +1142,6 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r) GL_REG("TEX0_%d = 0x%x_%x", i, r->U32[1], r->U32[0]); GIFRegTEX0 TEX0 = r->TEX0; - GIFRegMIPTBP1 temp_MIPTBP1; - bool MTBAReloaded = false; // Max allowed MTBA size for 32bit swizzled textures (including 8H 4HL etc) is 512, 16bit and normal 8/4bit formats can be 1024 const u32 maxTex = (GSLocalMemory::m_psm[TEX0.PSM].bpp < 32) ? 10 : 9; @@ -1174,6 +1166,7 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r) // Format must be a color, Z formats do not trigger MTBA (but are valid for Mipmapping) if (m_env.CTXT[i].TEX1.MTBA && TEX0.TW >= 5 && TEX0.TW <= maxTex && (TEX0.PSM & 0x30) != 0x30) { + GIFRegMIPTBP1& mip_tbp1 = m_env.CTXT[i].MIPTBP1; // NOTE 1: TEX1.MXL must not be automatically set to 3 here and it has no effect on MTBA. // NOTE 2: Mipmap levels are packed with a minimum distance between them of 1 block, even down at 4bit textures under 16x16. // NOTE 3: Everything is derrived from the width of the texture, TBW and TH are completely ignored (useful for handling non-rectangular ones) @@ -1190,39 +1183,32 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r) bw = std::max(bw >> 1, 1); tex_size = std::max(tex_size >> 2, 1); - temp_MIPTBP1.TBP1 = bp; - temp_MIPTBP1.TBW1 = bw; + mip_tbp1.TBP1 = bp; + mip_tbp1.TBW1 = bw; bp += tex_size; bw = std::max(bw >> 1, 1); tex_size = std::max(tex_size >> 2, 1); - temp_MIPTBP1.TBP2 = bp; - temp_MIPTBP1.TBW2 = bw; + mip_tbp1.TBP2 = bp; + mip_tbp1.TBW2 = bw; bp += tex_size; bw = std::max(bw >> 1, 1); - temp_MIPTBP1.TBP3 = bp; - temp_MIPTBP1.TBW3 = bw; - - MTBAReloaded = true; - } - - ApplyTEX0(TEX0); - - if (MTBAReloaded) - { - m_env.CTXT[i].MIPTBP1 = temp_MIPTBP1; + mip_tbp1.TBP3 = bp; + mip_tbp1.TBW3 = bw; if (i == m_prev_env.PRIM.CTXT) { - if (m_prev_env.CTXT[i].MIPTBP1.U64 ^ m_env.CTXT[i].MIPTBP1.U64) + if (m_prev_env.CTXT[i].MIPTBP1.U64 ^ mip_tbp1.U64) m_dirty_gs_regs |= (1 << DIRTY_REG_MIPTBP1); else m_dirty_gs_regs &= ~(1 << DIRTY_REG_MIPTBP1); } } + + ApplyTEX0(TEX0); } template @@ -2009,16 +1995,19 @@ void GSState::Write(const u8* mem, int len) GIFRegTEX0& prev_tex0 = m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0; - const u32 write_end_bp = GSLocalMemory::GetEndBlock(blit.DBP, blit.DBW, w + static_cast(m_env.TRXPOS.DSAX), h + static_cast(m_env.TRXPOS.DSAY), blit.DPSM); - const u32 tex_end_bp = GSLocalMemory::GetEndBlock(prev_tex0.TBP0, prev_tex0.TBW, 1 << prev_tex0.TW, 1 << prev_tex0.TH, prev_tex0.PSM); + const GSLocalMemory::psm_t& tex_psm = GSLocalMemory::m_psm[prev_tex0.PSM]; + + const u32 write_start_bp = (m_mem.*psm.pa)(static_cast(m_env.TRXPOS.DSAX), static_cast(m_env.TRXPOS.DSAY), blit.DBP, blit.DBW) >> 6; + const u32 write_end_bp = (m_mem.*psm.pa)(w + static_cast(m_env.TRXPOS.DSAX) - 1, h + static_cast(m_env.TRXPOS.DSAY) - 1, blit.DBP, blit.DBW) >> 6; + const u32 tex_end_bp = (m_mem.*psm.pa)((1 << prev_tex0.TW) - 1, (1 << prev_tex0.TH) - 1, prev_tex0.TBP0, prev_tex0.TBW) >> 6; // Only flush on a NEW transfer if a pending one is using the same address or overlap. // Check Fast & Furious (Hardare mode) and Assault Suits Valken (either renderer) and Tomb Raider - Angel of Darkness menu (TBP != DBP but overlaps). - if (m_tr.end == 0 && m_index.tail > 0 && m_prev_env.PRIM.TME && write_end_bp >= prev_tex0.TBP0 && blit.DBP <= tex_end_bp) + if (m_tr.end == 0 && m_index.tail > 0 && m_prev_env.PRIM.TME && write_end_bp > prev_tex0.TBP0 && write_start_bp <= tex_end_bp) { Flush(GSFlushReason::UPLOADDIRTYTEX); } // Invalid the CLUT if it crosses paths. - m_mem.m_clut.InvalidateRange(blit.DBP, write_end_bp); + m_mem.m_clut.InvalidateRange(write_start_bp, write_end_bp); GL_CACHE("Write! ... => 0x%x W:%d F:%s (DIR %d%d), dPos(%d %d) size(%d %d)", blit.DBP, blit.DBW, psm_str(blit.DPSM), @@ -2156,17 +2145,20 @@ void GSState::Move() GIFRegTEX0& prev_tex0 = m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0; - const u32 end_bp = GSLocalMemory::GetEndBlock(dbp, dbw, w + static_cast(m_env.TRXPOS.DSAX), h + static_cast(m_env.TRXPOS.DSAY), m_env.BITBLTBUF.DPSM); - const u32 tex_end_bp = GSLocalMemory::GetEndBlock(prev_tex0.TBP0, prev_tex0.TBW, 1 << prev_tex0.TW, 1 << prev_tex0.TH, prev_tex0.PSM); + const GSLocalMemory::psm_t& tex_psm = GSLocalMemory::m_psm[prev_tex0.PSM]; + const u32 write_start_bp = (m_mem.*dpsm.pa)(static_cast(m_env.TRXPOS.DSAX), static_cast(m_env.TRXPOS.DSAY), dbp, dbw) >> 6; + const u32 write_end_bp = (m_mem.*dpsm.pa)(w + static_cast(m_env.TRXPOS.DSAX) - 1, h + static_cast(m_env.TRXPOS.DSAY) - 1, dbp, dbw) >> 6; + const u32 tex_end_bp = (m_mem.*dpsm.pa)((1 << prev_tex0.TW) - 1, (1 << prev_tex0.TH) - 1, prev_tex0.TBP0, prev_tex0.TBW) >> 6; // Only flush on a NEW transfer if a pending one is using the same address or overlap. // Unknown if games use this one, but best to be safe. - if (m_index.tail > 0 && m_prev_env.PRIM.TME && end_bp >= prev_tex0.TBP0 && dbp <= static_cast(tex_end_bp)) + + if (m_index.tail > 0 && m_prev_env.PRIM.TME && write_end_bp >= prev_tex0.TBP0 && write_start_bp <= tex_end_bp) { Flush(GSFlushReason::LOCALTOLOCALMOVE); } // Invalid the CLUT if it crosses paths. - m_mem.m_clut.InvalidateRange(dbp, end_bp); + m_mem.m_clut.InvalidateRange(write_start_bp, write_end_bp); auto genericCopy = [=](const GSOffset& dpo, const GSOffset& spo, auto&& getPAHelper, auto&& pxCopyFn) { @@ -2957,19 +2949,69 @@ GSState::PRIM_OVERLAP GSState::PrimitiveOverlap() __forceinline bool GSState::IsAutoFlushDraw() { + if (!PRIM->TME) + return false; + const u32 frame_mask = GSLocalMemory::m_psm[m_context->TEX0.PSM].fmsk; - const bool frame_hit = (m_context->FRAME.Block() == m_context->TEX0.TBP0) && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); + const bool frame_hit = m_context->FRAME.Block() == m_context->TEX0.TBP0 && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask); // There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd. const bool zbuf_hit = (m_context->ZBUF.Block() == m_context->TEX0.TBP0) && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL != 2) && !m_context->ZBUF.ZMSK; const u32 frame_z_psm = frame_hit ? m_context->FRAME.PSM : m_context->ZBUF.PSM; const u32 frame_z_bp = frame_hit ? m_context->FRAME.Block() : m_context->ZBUF.Block(); - if (PRIM->TME && (frame_hit || zbuf_hit) && GSUtil::HasSharedBits(frame_z_bp, frame_z_psm, m_context->TEX0.TBP0, m_context->TEX0.PSM)) + if ((frame_hit || zbuf_hit) && GSUtil::HasSharedBits(frame_z_bp, frame_z_psm, m_context->TEX0.TBP0, m_context->TEX0.PSM)) return true; return false; } +__forceinline void GSState::CLUTAutoFlush() +{ + if (m_mem.m_clut.IsInvalid() & 2) + return; + + int n = 1; + + switch (PRIM->PRIM) + { + case GS_POINTLIST: + n = 1; + break; + case GS_LINELIST: + case GS_LINESTRIP: + case GS_SPRITE: + n = 2; + break; + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + n = 3; + break; + case GS_TRIANGLEFAN: + n = 3; + break; + case GS_INVALID: + default: + break; + } + + if ((m_index.tail > 0 || (m_vertex.tail == n-1)) && (GSLocalMemory::m_psm[m_context->TEX0.PSM].pal == 0 || !PRIM->TME)) + { + if ((m_context->FRAME.FBMSK & GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) != GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) + { + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + const u32 startbp = (m_mem.*psm.pa)(temp_draw_rect.x, temp_draw_rect.y, m_context->FRAME.Block(), m_context->FRAME.FBW) >> 6; + + // If it's a point, then we only have one coord, so the address for start and end will be the same, which is bad for the following check. + u32 endbp = startbp + 1; + // otherwise calculate the end. + if (PRIM->PRIM != GS_POINTLIST || (m_index.tail > 1)) + endbp = (m_mem.*psm.pa)(temp_draw_rect.z, temp_draw_rect.w, m_context->FRAME.Block(), m_context->FRAME.FBW) >> 6; + + m_mem.m_clut.InvalidateRange(startbp, endbp); + } + } +} + __forceinline void GSState::HandleAutoFlush() { // Kind of a cheat, making the assumption that 2 consecutive fan/strip triangles won't overlap each other (*should* be safe) @@ -3190,7 +3232,7 @@ __forceinline void GSState::VertexKick(u32 skip) ASSERT(m_vertex.tail < m_vertex.maxcount + 3); - if (auto_flush && m_index.tail > 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n) + if (auto_flush && skip == 0 && m_index.tail > 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n) { HandleAutoFlush(); } @@ -3403,6 +3445,39 @@ __forceinline void GSState::VertexKick(u32 skip) default: __assume(0); } + + + + GSVector4i draw_coord; + const GSVector2i offset = GSVector2i(m_context->XYOFFSET.OFX, m_context->XYOFFSET.OFY); + + for (int i = 0; i < n; i++) + { + const GSVertex* v = &m_vertex.buff[m_index.buff[(m_index.tail - n) + i]]; + draw_coord.x = (static_cast(v->XYZ.X) - offset.x) >> 4; + draw_coord.y = (static_cast(v->XYZ.Y) - offset.y) >> 4; + + if (m_vertex.tail == n && i == 0) + { + const GSVector4i scissor = GSVector4i(m_context->scissor.in); + + temp_draw_rect.x = draw_coord.x; + temp_draw_rect.y = draw_coord.y; + temp_draw_rect = temp_draw_rect.xyxy(); + } + else + { + temp_draw_rect.x = std::min(draw_coord.x, temp_draw_rect.x); + temp_draw_rect.y = std::min(draw_coord.y, temp_draw_rect.y); + temp_draw_rect.z = std::max(draw_coord.x, temp_draw_rect.z); + temp_draw_rect.w = std::max(draw_coord.y, temp_draw_rect.w); + } + } + + const GSVector4i scissor = GSVector4i(m_context->scissor.in); + temp_draw_rect.rintersect(scissor); + + CLUTAutoFlush(); } /// Checks if region repeat is used (applying it does something to at least one of the values in min...max) diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 760397e2f989f4..a01194dcffc635 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -186,6 +186,7 @@ class GSState : public GSAlignedClass<32> void GrowVertexBuffer(); bool IsAutoFlushDraw(); void HandleAutoFlush(); + void CLUTAutoFlush(); template void VertexKick(u32 skip); @@ -228,6 +229,7 @@ class GSState : public GSAlignedClass<32> GSDrawingEnvironment m_env; GSDrawingEnvironment m_backup_env; GSDrawingEnvironment m_prev_env; + GSVector4i temp_draw_rect; GSDrawingContext* m_context; u32 m_crc; CRC::Game m_game; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index 5660bb6bcd74d0..dd5da5b9671e02 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1275,6 +1275,7 @@ void GSRendererHW::Draw() s = StringUtil::StdStringFromFormat("%05d_vertex.txt", s_n); DumpVertices(m_dump_root + s); } + if (IsBadFrame()) { GL_INS("Warning skipping a draw call (%d)", s_n); @@ -1390,6 +1391,20 @@ void GSRendererHW::Draw() return; } + // SW CLUT Render enable. + if (GSConfig.UserHacks_CPUCLUTRender > 0) + { + bool result = (GSConfig.UserHacks_CPUCLUTRender == 1) ? PossibleCLUTDraw() : PossibleCLUTDrawAggressive(); + if (result) + { + if (SwPrimRender()) + { + GL_CACHE("Possible clut draw, drawn with SwPrimRender()"); + return; + } + } + } + if (m_channel_shuffle) { m_channel_shuffle = draw_sprite_tex && (m_context->TEX0.PSM == PSM_PSMT8) && single_page; @@ -3879,6 +3894,113 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc g_gs_device->RenderHW(m_conf); } +bool GSRendererHW::PossibleCLUTDraw() +{ + if (m_channel_shuffle || m_texture_shuffle) + return false; + + // Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle. + if (PRIM->TME && GSLocalMemory::m_psm[m_context->TEX0.PSM].pal > 0) + return false; + + // Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat. + const bool fb_only = m_context->TEST.ATE && m_context->TEST.AFAIL == 1 && m_context->TEST.ATST == ATST_NEVER; + + if (!m_context->ZBUF.ZMSK && !fb_only) + return false; + + if (m_vt.m_eq.z != 0x1) + return false; + + if (m_context->TEX1.MXL) + return false; + + const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM]; + + // Where the draw starts and ends. + const u32 start_bp = (m_mem.*psm.pa)(m_vt.m_min.p.x, m_vt.m_min.p.y, m_context->FRAME.Block(), m_context->FRAME.FBW) >> 6; + const u32 end_bp = (m_mem.*psm.pa)(m_vt.m_max.p.x, m_vt.m_max.p.y, m_context->FRAME.Block(), m_context->FRAME.FBW) >> 6; + + GIFRegTEX0 next_clut_tex0; + next_clut_tex0.U64 = m_mem.m_clut.GetNextCLUTCBP(); + + bool next_clut_match = ((next_clut_tex0.CBP + 4) >= start_bp && end_bp >= next_clut_tex0.CBP) && (m_state_flush_reason & CLUTCHANGE); + + // Max size for a CLUT/Current page size. + constexpr float clut_width = 16.0f; + constexpr float clut_height = 16.0f; + const float page_width = static_cast(GSLocalMemory::m_psm[m_context->FRAME.PSM].pgs.x); + const float page_height = static_cast(GSLocalMemory::m_psm[m_context->FRAME.PSM].pgs.y); + + // Special case for games replacing the alpha only like Minority Report and NFS HP2 (The busted screen), Klonoa 2 pipe monster shadow CLUT. + bool valid_size = ((m_vt.m_max.p.x - m_vt.m_min.p.x) <= clut_width && (m_vt.m_max.p.y - m_vt.m_min.p.y) <= clut_height && (m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS || m_vt.m_primclass == GS_SPRITE_CLASS)) || (m_vt.m_max.p.x == page_width && m_vt.m_max.p.y == page_height); + + + if (!(next_clut_match || valid_size)) + return false; + + // Make sure it looks like a clut draw, can be anywhere in the page. + if (!((m_vt.m_max.p.x <= page_width && m_vt.m_max.p.y <= page_height) && valid_size)) + return false; + + // Make sure the draw hits the next CLUT and it's marked as invalid (kind of a sanity check). + // Possilby shouldn't have the invalid check, but without it NFS HP2 busted screen is... busted. + // Used to make sure it was either Point, Line or Sprite, but Klonoa 2 decided that drawing a CLUT with triangles was a good idea. Seems ok though. + if (!(m_mem.m_clut.IsInvalid() & 2) && !next_clut_match && !valid_size) + { + return false; + } + + if (PRIM->TME) + { + // If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need. + const GSVector4i r = GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear()).coverage; + + GIFRegBITBLTBUF BITBLTBUF; + BITBLTBUF.SBP = m_context->TEX0.TBP0; + BITBLTBUF.SBW = m_context->TEX0.TBW; + BITBLTBUF.SPSM = m_context->TEX0.PSM; + + InvalidateLocalMem(BITBLTBUF, r); + } + + return true; +} + +// Slight more aggressive version that kinda YOLO's it if the draw is anywhere near the CLUT or is point/line (providing it's not too wide of a draw and a few other parameters. +// This is pretty much tuned for the Sega Model 2 games, which draw a huge gradient, then pick lines out of it to make up CLUT's for about 4000 draws... +bool GSRendererHW::PossibleCLUTDrawAggressive() +{ + // Avoid any shuffles. + if (m_channel_shuffle || m_texture_shuffle) + return false; + + // Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat. + if (m_context->TEST.ATE) + return false; + + if (PRIM->ABE) + return false; + + if (m_context->TEX1.MXL) + return false; + + if (m_context->FRAME.FBW != 1) + return false; + + if (!m_context->ZBUF.ZMSK) + return false; + + if (m_vt.m_eq.z != 0x1) + return false; + + if (!((m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) || ((m_mem.m_clut.GetCLUTCBP() >> 5) >= m_context->FRAME.FBP && (m_context->FRAME.FBP + 1) >= (m_mem.m_clut.GetCLUTCBP() >> 5) && m_vt.m_primclass == GS_SPRITE_CLASS))) + return false; + + // Avoid invalidating anything here, we just want to avoid the thing being drawn on the GPU. + return true; +} + bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex) { // Master enable. diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.h b/pcsx2/GS/Renderers/HW/GSRendererHW.h index df9bc9d5987118..14122e3c01ac8d 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.h +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.h @@ -127,6 +127,8 @@ class GSRendererHW : public GSRenderer void SwSpriteRender(); bool CanUseSwSpriteRender(); + bool PossibleCLUTDraw(); + bool PossibleCLUTDrawAggressive(); bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex); bool SwPrimRender(); diff --git a/pcsx2/GameDatabase.cpp b/pcsx2/GameDatabase.cpp index e5c4d4ac78105c..758cd9e5f4154c 100644 --- a/pcsx2/GameDatabase.cpp +++ b/pcsx2/GameDatabase.cpp @@ -290,6 +290,7 @@ static const char* s_gs_hw_fix_names[] = { "texturePreloading", "deinterlace", "cpuSpriteRenderBW", + "cpuCLUTRender", "gpuPaletteConversion", }; static_assert(std::size(s_gs_hw_fix_names) == static_cast(GameDatabaseSchema::GSHWFixId::Count), "HW fix name lookup is correct size"); @@ -499,6 +500,9 @@ bool GameDatabaseSchema::GameEntry::configMatchesHWFix(const Pcsx2Config::GSOpti case GSHWFixId::CPUSpriteRenderBW: return (config.UserHacks_CPUSpriteRenderBW == value); + case GSHWFixId::CPUCLUTRender: + return (config.UserHacks_CPUCLUTRender == value); + case GSHWFixId::GPUPaletteConversion: return (config.GPUPaletteConversion == ((value > 1) ? (config.TexturePreloading == TexturePreloadingLevel::Full) : (value != 0))); @@ -642,6 +646,9 @@ u32 GameDatabaseSchema::GameEntry::applyGSHardwareFixes(Pcsx2Config::GSOptions& config.UserHacks_CPUSpriteRenderBW = value; break; + case GSHWFixId::CPUCLUTRender: + config.UserHacks_CPUCLUTRender = value; + break; case GSHWFixId::GPUPaletteConversion: { diff --git a/pcsx2/GameDatabase.h b/pcsx2/GameDatabase.h index 25d947b72cc3f9..8e9e0cd0b67434 100644 --- a/pcsx2/GameDatabase.h +++ b/pcsx2/GameDatabase.h @@ -82,6 +82,7 @@ namespace GameDatabaseSchema TexturePreloading, Deinterlace, CPUSpriteRenderBW, + CPUCLUTRender, GPUPaletteConversion, Count diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index df4a1493dc7e48..39b547f22c10b7 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -423,6 +423,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const OpEqu(UserHacks_TCOffsetX) && OpEqu(UserHacks_TCOffsetY) && OpEqu(UserHacks_CPUSpriteRenderBW) && + OpEqu(UserHacks_CPUCLUTRender) && OpEqu(OverrideTextureBarriers) && OpEqu(OverrideGeometryShaders) && @@ -616,6 +617,7 @@ void Pcsx2Config::GSOptions::ReloadIniSettings() GSSettingIntEx(UserHacks_TCOffsetX, "UserHacks_TCOffsetX"); GSSettingIntEx(UserHacks_TCOffsetY, "UserHacks_TCOffsetY"); GSSettingIntEx(UserHacks_CPUSpriteRenderBW, "UserHacks_CPUSpriteRenderBW"); + GSSettingIntEx(UserHacks_CPUCLUTRender, "UserHacks_CPUCLUTRender"); GSSettingIntEnumEx(TriFilter, "TriFilter"); GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers"); GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders"); @@ -663,6 +665,7 @@ void Pcsx2Config::GSOptions::MaskUserHacks() UserHacks_TCOffsetX = 0; UserHacks_TCOffsetY = 0; UserHacks_CPUSpriteRenderBW = 0; + UserHacks_CPUCLUTRender = 0; SkipDrawStart = 0; SkipDrawEnd = 0; }