Skip to content

Commit

Permalink
GS: Clean up some state/clut things, more in depth clut overwrite check.
Browse files Browse the repository at this point in the history
  • Loading branch information
refractionpcsx2 committed Oct 22, 2022
1 parent 7aa05c0 commit b553ed1
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 49 deletions.
28 changes: 13 additions & 15 deletions pcsx2/GS/GSClut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,17 @@ GSClut::~GSClut()
vmfree(m_clut, CLUT_ALLOC_SIZE);
}

void GSClut::Invalidate()
bool GSClut::IsInvalid()
{
m_write.dirty = true;
return m_write.dirty;
}

void GSClut::InvalidateRange(u32 start_block, u32 end_block)
u32 GSClut::GetCLUTCBP()
{
return m_write.TEX0.CBP;
}

bool GSClut::InvalidateRange(u32 start_block, u32 end_block)
{
int blocks = 4;

Expand All @@ -122,15 +127,8 @@ void GSClut::InvalidateRange(u32 start_block, u32 end_block)
{
m_write.dirty = true;
}
}

// Check the whole page, if the CLUT is slightly offset from a page boundary it could miss it.
void GSClut::Invalidate(u32 block)
{
if (!((block ^ m_write.TEX0.CBP) & ~0x1F))
{
m_write.dirty = true;
}
#
return m_write.dirty;
}

bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
Expand Down Expand Up @@ -775,7 +773,7 @@ bool GSClut::WriteState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TE

bool is_dirty = dirty;

if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp))
if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].pal != GSLocalMemory::m_psm[TEX0.PSM].pal))
is_dirty |= true;
else if (TEX0.CSM == 1 && (TEXCLUT.U32[0] ^ this->TEXCLUT.U32[0]))
is_dirty |= true;
Expand All @@ -795,7 +793,7 @@ bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0)

bool is_dirty = dirty;

if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp))
if (((this->TEX0.U64 ^ TEX0.U64) & mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].pal != GSLocalMemory::m_psm[TEX0.PSM].pal))
is_dirty |= true;

if (!is_dirty)
Expand All @@ -814,7 +812,7 @@ bool GSClut::ReadState::IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)

bool is_dirty = dirty;

if (((this->TEX0.U64 ^ TEX0.U64) & tex0_mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].bpp != GSLocalMemory::m_psm[TEX0.PSM].bpp))
if (((this->TEX0.U64 ^ TEX0.U64) & tex0_mask) || (GSLocalMemory::m_psm[this->TEX0.PSM].pal != GSLocalMemory::m_psm[TEX0.PSM].pal))
is_dirty |= true;
else // Just to optimise the checks.
{
Expand Down
6 changes: 3 additions & 3 deletions pcsx2/GS/GSClut.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ class alignas(32) GSClut : public GSAlignedClass<32>
GSClut(GSLocalMemory* mem);
virtual ~GSClut();

void Invalidate();
void Invalidate(u32 block);
void InvalidateRange(u32 start_block, u32 end_block);
bool InvalidateRange(u32 start_block, u32 end_block);
bool IsInvalid();
u32 GetCLUTCBP();
bool WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
void Write(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
//void Read(const GIFRegTEX0& TEX0);
Expand Down
76 changes: 45 additions & 31 deletions pcsx2/GS/GSState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -777,16 +777,8 @@ __inline void GSState::CheckFlushes()
if (m_dirty_gs_regs && m_index.tail > 0)
{
if (TestDrawChanged())
{
Flush(GSFlushReason::CONTEXTCHANGE);
}
}
if ((m_context->FRAME.FBMSK & GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) != GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)
m_mem.m_clut.Invalidate(m_context->FRAME.Block());

// Hey, why not check? I mean devs have done crazier things..
if(!m_context->ZBUF.ZMSK)
m_mem.m_clut.Invalidate(m_context->ZBUF.Block());
}

void GSState::GIFPackedRegHandlerNull(const GIFPackedReg* RESTRICT r)
Expand Down Expand Up @@ -1076,8 +1068,6 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
const bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT);

// clut loading already covered with WriteTest, for drawing only have to check CPSM and CSA (MGS3 intro skybox would be drawn piece by piece without this)

if (wt)
Flush(GSFlushReason::CLUTCHANGE);

Expand Down Expand Up @@ -1148,8 +1138,6 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
GL_REG("TEX0_%d = 0x%x_%x", i, r->U32[1], r->U32[0]);

GIFRegTEX0 TEX0 = r->TEX0;
GIFRegMIPTBP1 temp_MIPTBP1;
bool MTBAReloaded = false;
// Max allowed MTBA size for 32bit swizzled textures (including 8H 4HL etc) is 512, 16bit and normal 8/4bit formats can be 1024
const u32 maxTex = (GSLocalMemory::m_psm[TEX0.PSM].bpp < 32) ? 10 : 9;

Expand All @@ -1174,6 +1162,7 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
// Format must be a color, Z formats do not trigger MTBA (but are valid for Mipmapping)
if (m_env.CTXT[i].TEX1.MTBA && TEX0.TW >= 5 && TEX0.TW <= maxTex && (TEX0.PSM & 0x30) != 0x30)
{
GIFRegMIPTBP1& mip_tbp1 = m_env.CTXT[i].MIPTBP1;
// NOTE 1: TEX1.MXL must not be automatically set to 3 here and it has no effect on MTBA.
// NOTE 2: Mipmap levels are packed with a minimum distance between them of 1 block, even down at 4bit textures under 16x16.
// NOTE 3: Everything is derrived from the width of the texture, TBW and TH are completely ignored (useful for handling non-rectangular ones)
Expand All @@ -1190,39 +1179,32 @@ void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
bw = std::max<u32>(bw >> 1, 1);
tex_size = std::max<u32>(tex_size >> 2, 1);

temp_MIPTBP1.TBP1 = bp;
temp_MIPTBP1.TBW1 = bw;
mip_tbp1.TBP1 = bp;
mip_tbp1.TBW1 = bw;

bp += tex_size;
bw = std::max<u32>(bw >> 1, 1);
tex_size = std::max<u32>(tex_size >> 2, 1);

temp_MIPTBP1.TBP2 = bp;
temp_MIPTBP1.TBW2 = bw;
mip_tbp1.TBP2 = bp;
mip_tbp1.TBW2 = bw;

bp += tex_size;
bw = std::max<u32>(bw >> 1, 1);

temp_MIPTBP1.TBP3 = bp;
temp_MIPTBP1.TBW3 = bw;

MTBAReloaded = true;
}

ApplyTEX0<i>(TEX0);

if (MTBAReloaded)
{
m_env.CTXT[i].MIPTBP1 = temp_MIPTBP1;
mip_tbp1.TBP3 = bp;
mip_tbp1.TBW3 = bw;

if (i == m_prev_env.PRIM.CTXT)
{
if (m_prev_env.CTXT[i].MIPTBP1.U64 ^ m_env.CTXT[i].MIPTBP1.U64)
if (m_prev_env.CTXT[i].MIPTBP1.U64 ^ mip_tbp1.U64)
m_dirty_gs_regs |= (1 << DIRTY_REG_MIPTBP1);
else
m_dirty_gs_regs &= ~(1 << DIRTY_REG_MIPTBP1);
}
}

ApplyTEX0<i>(TEX0);
}

template <int i>
Expand Down Expand Up @@ -2957,19 +2939,49 @@ GSState::PRIM_OVERLAP GSState::PrimitiveOverlap()

__forceinline bool GSState::IsAutoFlushDraw()
{
if (!PRIM->TME)
return false;

const u32 frame_mask = GSLocalMemory::m_psm[m_context->TEX0.PSM].fmsk;
const bool frame_hit = (m_context->FRAME.Block() == m_context->TEX0.TBP0) && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask);
const bool frame_hit = ((m_context->FRAME.Block() == m_context->TEX0.TBP0) || (m_context->FRAME.Block() == m_context->TEX0.CBP)) && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL == 2) && ((m_context->FRAME.FBMSK & frame_mask) != frame_mask);
// There's a strange behaviour we need to test on a PS2 here, if the FRAME is a Z format, like Powerdrome something swaps over, and it seems Alpha Fail of "FB Only" writes to the Z.. it's odd.
const bool zbuf_hit = (m_context->ZBUF.Block() == m_context->TEX0.TBP0) && !(m_context->TEST.ATE && m_context->TEST.ATST == 0 && m_context->TEST.AFAIL != 2) && !m_context->ZBUF.ZMSK;
const u32 frame_z_psm = frame_hit ? m_context->FRAME.PSM : m_context->ZBUF.PSM;
const u32 frame_z_bp = frame_hit ? m_context->FRAME.Block() : m_context->ZBUF.Block();

if (PRIM->TME && (frame_hit || zbuf_hit) && GSUtil::HasSharedBits(frame_z_bp, frame_z_psm, m_context->TEX0.TBP0, m_context->TEX0.PSM))
if ((frame_hit || zbuf_hit) && GSUtil::HasSharedBits(frame_z_bp, frame_z_psm, m_context->TEX0.TBP0, m_context->TEX0.PSM))
return true;

return false;
}

__forceinline void GSState::CLUTAutoFlush()
{
if (m_mem.m_clut.IsInvalid())
return;

const GSVector2i offset = GSVector2i(m_context->XYOFFSET.OFX, m_context->XYOFFSET.OFY);
GSVector4i tex_coord;
tex_coord.x = (static_cast<int>(m_v.XYZ.X) - offset.x) >> 4;
tex_coord.y = (static_cast<int>(m_v.XYZ.Y) - offset.y) >> 4;

// Quick checks first they (might) be enough as most CLUT draws will be from 0,0
if ((m_context->FRAME.FBMSK & GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk) != GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk)
{
const int end_bp = GSLocalMemory::GetEndBlock(m_context->FRAME.Block(), m_context->FRAME.FBW, tex_coord.x, tex_coord.y, m_context->FRAME.PSM);
if (m_mem.m_clut.InvalidateRange(m_context->FRAME.Block(), end_bp))
return;
}

// Hey, why not check? I mean devs have done crazier things..
if (!m_context->ZBUF.ZMSK)
{
const int end_bp = GSLocalMemory::GetEndBlock(m_context->FRAME.Block(), m_context->FRAME.FBW, tex_coord.x, tex_coord.y, m_context->ZBUF.PSM);
if (m_mem.m_clut.InvalidateRange(m_context->ZBUF.Block(), end_bp))
return;
}
}

__forceinline void GSState::HandleAutoFlush()
{
// Kind of a cheat, making the assumption that 2 consecutive fan/strip triangles won't overlap each other (*should* be safe)
Expand Down Expand Up @@ -3190,7 +3202,9 @@ __forceinline void GSState::VertexKick(u32 skip)

ASSERT(m_vertex.tail < m_vertex.maxcount + 3);

if (auto_flush && m_index.tail > 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n)
CLUTAutoFlush();

if (auto_flush && skip == 0 && m_index.tail > 0 && ((m_vertex.tail + 1) - m_vertex.head) >= n)
{
HandleAutoFlush();
}
Expand Down
1 change: 1 addition & 0 deletions pcsx2/GS/GSState.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ class GSState : public GSAlignedClass<32>
void GrowVertexBuffer();
bool IsAutoFlushDraw();
void HandleAutoFlush();
void CLUTAutoFlush();

template <u32 prim, bool auto_flush, bool index_swap>
void VertexKick(u32 skip);
Expand Down

0 comments on commit b553ed1

Please sign in to comment.