Skip to content

Commit

Permalink
Merge branch 'stenzek:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
yeager authored Dec 27, 2024
2 parents 51df7a0 + 69ed6e5 commit 07d33c3
Show file tree
Hide file tree
Showing 10 changed files with 1,389 additions and 1,405 deletions.
72 changes: 18 additions & 54 deletions src/core/cpu_code_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@ static constexpr u32 RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK = 15;
static constexpr u32 INVALIDATE_COUNT_FOR_MANUAL_PROTECTION = 4;
static constexpr u32 INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION = 60;

static CodeLUT DecodeCodeLUTPointer(u32 slot, CodeLUT ptr);
static CodeLUT EncodeCodeLUTPointer(u32 slot, CodeLUT ptr);
static CodeLUT OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc);

static void AllocateLUTs();
static void DeallocateLUTs();
static void ResetCodeLUT();
Expand All @@ -67,8 +63,8 @@ static Block* LookupBlock(u32 pc);
static Block* CreateBlock(u32 pc, const BlockInstructionList& instructions, const BlockMetadata& metadata);
static bool IsBlockCodeCurrent(const Block* block);
static bool RevalidateBlock(Block* block);
PageProtectionMode GetProtectionModeForPC(u32 pc);
PageProtectionMode GetProtectionModeForBlock(const Block* block);
static PageProtectionMode GetProtectionModeForPC(u32 pc);
static PageProtectionMode GetProtectionModeForBlock(const Block* block);
static bool ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata);
static void FillBlockRegInfo(Block* block);
static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src);
Expand Down Expand Up @@ -277,31 +273,6 @@ static constexpr u32 GetLUTSlotCount(bool include_unreachable)
}
} // namespace CPU::CodeCache

CPU::CodeCache::CodeLUT CPU::CodeCache::DecodeCodeLUTPointer(u32 slot, CodeLUT ptr)
{
if constexpr (sizeof(void*) == 8)
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (static_cast<u64>(slot) << 17));
else
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) + (slot << 16));
}

CPU::CodeCache::CodeLUT CPU::CodeCache::EncodeCodeLUTPointer(u32 slot, CodeLUT ptr)
{
if constexpr (sizeof(void*) == 8)
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (static_cast<u64>(slot) << 17));
else
return reinterpret_cast<CodeLUT>(reinterpret_cast<u8*>(ptr) - (slot << 16));
}

CPU::CodeCache::CodeLUT CPU::CodeCache::OffsetCodeLUTPointer(CodeLUT fake_ptr, u32 pc)
{
u8* fake_byte_ptr = reinterpret_cast<u8*>(fake_ptr);
if constexpr (sizeof(void*) == 8)
return reinterpret_cast<const void**>(fake_byte_ptr + (static_cast<u64>(pc) << 1));
else
return reinterpret_cast<const void**>(fake_byte_ptr + pc);
}

void CPU::CodeCache::AllocateLUTs()
{
constexpr u32 num_code_slots = GetLUTSlotCount(true);
Expand All @@ -323,9 +294,11 @@ void CPU::CodeCache::AllocateLUTs()
// Mark everything as unreachable to begin with.
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
{
g_code_lut[i] = EncodeCodeLUTPointer(i, code_table_ptr);
g_code_lut[i] = code_table_ptr;
s_block_lut[i] = nullptr;
}

// Exclude unreachable.
code_table_ptr += LUT_TABLE_SIZE;

// Allocate ranges.
Expand All @@ -337,7 +310,7 @@ void CPU::CodeCache::AllocateLUTs()
{
const u32 slot = start_slot + i;

g_code_lut[slot] = EncodeCodeLUTPointer(slot, code_table_ptr);
g_code_lut[slot] = code_table_ptr;
code_table_ptr += LUT_TABLE_SIZE;

s_block_lut[slot] = block_table_ptr;
Expand All @@ -357,15 +330,13 @@ void CPU::CodeCache::DeallocateLUTs()

void CPU::CodeCache::ResetCodeLUT()
{
if (!s_lut_code_pointers)
return;

// Make the unreachable table jump to the invalid code callback.
MemsetPtrs(s_lut_code_pointers.get(), g_interpret_block, LUT_TABLE_COUNT);

for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
{
CodeLUT ptr = DecodeCodeLUTPointer(i, g_code_lut[i]);
// Don't overwrite anything bound to unreachable.
CodeLUT ptr = g_code_lut[i];
if (ptr == s_lut_code_pointers.get())
continue;

Expand All @@ -375,18 +346,10 @@ void CPU::CodeCache::ResetCodeLUT()

void CPU::CodeCache::SetCodeLUT(u32 pc, const void* function)
{
if (!s_lut_code_pointers)
return;

const u32 table = pc >> LUT_TABLE_SHIFT;
CodeLUT encoded_ptr = g_code_lut[table];

#ifdef _DEBUG
const CodeLUT table_ptr = DecodeCodeLUTPointer(table, encoded_ptr);
DebugAssert(table_ptr != nullptr && table_ptr != s_lut_code_pointers.get());
#endif

*OffsetCodeLUTPointer(encoded_ptr, pc) = function;
const u32 idx = (pc & 0xFFFF) >> 2;
DebugAssert(g_code_lut[table] != s_lut_code_pointers.get());
g_code_lut[table][idx] = function;
}

CPU::CodeCache::Block* CPU::CodeCache::LookupBlock(u32 pc)
Expand Down Expand Up @@ -948,7 +911,6 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
InstructionInfo info;
std::memset(&info, 0, sizeof(info));

info.pc = pc;
info.is_branch_delay_slot = is_branch_delay_slot;
info.is_load_delay_slot = is_load_delay_slot;
info.is_branch_instruction = IsBranchInstruction(instruction);
Expand Down Expand Up @@ -985,18 +947,18 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i
const BlockInstructionInfoPair& prev = instructions->back();
if (!prev.second.is_unconditional_branch_instruction || !prev.second.is_direct_branch_instruction)
{
WARNING_LOG("Conditional or indirect branch delay slot at {:08X}, skipping block", info.pc);
WARNING_LOG("Conditional or indirect branch delay slot at {:08X}, skipping block", pc);
return false;
}
if (!IsDirectBranchInstruction(instruction))
{
WARNING_LOG("Indirect branch in delay slot at {:08X}, skipping block", info.pc);
WARNING_LOG("Indirect branch in delay slot at {:08X}, skipping block", pc);
return false;
}

// we _could_ fetch the delay slot from the first branch's target, but it's probably in a different
// page, and that's an invalidation nightmare. so just fallback to the int, this is very rare anyway.
WARNING_LOG("Direct branch in delay slot at {:08X}, skipping block", info.pc);
WARNING_LOG("Direct branch in delay slot at {:08X}, skipping block", pc);
return false;
}

Expand Down Expand Up @@ -1029,14 +991,16 @@ bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* i

#if defined(_DEBUG) || defined(_DEVEL)
SmallString disasm;
u32 disasm_pc = start_pc;
DEBUG_LOG("Block at 0x{:08X}", start_pc);
DEBUG_LOG(" Uncached fetch ticks: {}", metadata->uncached_fetch_ticks);
DEBUG_LOG(" ICache line count: {}", metadata->icache_line_count);
for (const auto& cbi : *instructions)
{
CPU::DisassembleInstruction(&disasm, cbi.second.pc, cbi.first.bits);
CPU::DisassembleInstruction(&disasm, disasm_pc, cbi.first.bits);
DEBUG_LOG("[{} {} 0x{:08X}] {:08X} {}", cbi.second.is_branch_delay_slot ? "BD" : " ",
cbi.second.is_load_delay_slot ? "LD" : " ", cbi.second.pc, cbi.first.bits, disasm);
cbi.second.is_load_delay_slot ? "LD" : " ", disasm_pc, cbi.first.bits, disasm);
disasm_pc += sizeof(Instruction);
}
#endif

Expand Down
2 changes: 0 additions & 2 deletions src/core/cpu_code_cache_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,6 @@ enum RegInfoFlags : u8

struct InstructionInfo
{
u32 pc; // TODO: Remove this, old recs still depend on it.

bool is_branch_instruction : 1;
bool is_direct_branch_instruction : 1;
bool is_unconditional_branch_instruction : 1;
Expand Down
19 changes: 14 additions & 5 deletions src/core/cpu_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2549,7 +2549,7 @@ void CPU::CodeCache::InterpretCachedBlock(const Block* block)

// now executing the instruction we previously fetched
g_state.current_instruction.bits = instruction->bits;
g_state.current_instruction_pc = info->pc;
g_state.current_instruction_pc = g_state.pc;
g_state.current_instruction_in_branch_delay_slot = info->is_branch_delay_slot; // TODO: let int set it instead
g_state.current_instruction_was_branch_taken = g_state.branch_was_taken;
g_state.branch_was_taken = false;
Expand Down Expand Up @@ -2706,10 +2706,13 @@ ALWAYS_INLINE_RELEASE bool CPU::DoInstructionRead(PhysicalMemoryAddress address,

return true;
}
else
else [[unlikely]]
{
if (raise_exceptions)
CPU::RaiseException(address, Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0));
{
g_state.cop0_regs.BadVaddr = address;
RaiseException(Cop0Registers::CAUSE::MakeValueForException(Exception::IBE, false, false, 0), address);
}

std::memset(data, 0, sizeof(u32) * word_count);
return false;
Expand Down Expand Up @@ -2871,7 +2874,13 @@ ALWAYS_INLINE_RELEASE bool CPU::FetchInstruction()

bool CPU::FetchInstructionForInterpreterFallback()
{
DebugAssert(Common::IsAlignedPow2(g_state.npc, 4));
if (!Common::IsAlignedPow2(g_state.npc, 4)) [[unlikely]]
{
// The BadVaddr and EPC must be set to the fetching address, not the instruction about to execute.
g_state.cop0_regs.BadVaddr = g_state.npc;
RaiseException(Cop0Registers::CAUSE::MakeValueForException(Exception::AdEL, false, false, 0), g_state.npc);
return false;
}

const PhysicalMemoryAddress address = g_state.npc;
switch (address >> 29)
Expand All @@ -2881,7 +2890,7 @@ bool CPU::FetchInstructionForInterpreterFallback()
case 0x05: // KSEG1 - physical memory uncached
{
// We don't use the icache when doing interpreter fallbacks, because it's probably stale.
if (!DoInstructionRead<false, false, 1, true>(address, &g_state.next_instruction.bits))
if (!DoInstructionRead<false, false, 1, true>(address, &g_state.next_instruction.bits)) [[unlikely]]
return false;
}
break;
Expand Down
7 changes: 4 additions & 3 deletions src/core/cpu_recompiler_arm32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,11 +290,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
armAsm->ldr(RARG1, PTR(&g_state.pc));
armMoveAddressToReg(armAsm, RARG3, g_code_lut.data());
armAsm->lsr(RARG2, RARG1, 16);
armAsm->ubfx(RARG1, RARG1, 2, 14);
armAsm->ldr(RARG2, MemOperand(RARG3, RARG2, LSL, 2));

// blr(x9[pc * 2]) (fast_map[pc >> 2])
armAsm->ldr(RARG1, MemOperand(RARG2, RARG1));
armAsm->blx(RARG1);
armAsm->ldr(RARG1, MemOperand(RARG2, RARG1, LSL, 2));
armAsm->bx(RARG1);
}

g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>();
Expand Down Expand Up @@ -1024,7 +1025,7 @@ void CPU::ARM32Recompiler::Flush(u32 flags)

void CPU::ARM32Recompiler::Compile_Fallback()
{
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits);
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);

Flush(FLUSH_FOR_INTERPRETER);

Expand Down
6 changes: 3 additions & 3 deletions src/core/cpu_recompiler_arm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,12 +478,12 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
armAsm->ldr(RWARG1, PTR(&g_state.pc));
armMoveAddressToReg(armAsm, RXARG3, g_code_lut.data());
armAsm->lsr(RWARG2, RWARG1, 16);
armAsm->lsr(RWARG1, RWARG1, 2);
armAsm->ubfx(RWARG1, RWARG1, 2, 14);
armAsm->ldr(RXARG2, MemOperand(RXARG3, RXARG2, LSL, 3));

// blr(x9[pc * 2]) (fast_map[pc >> 2])
armAsm->ldr(RXARG1, MemOperand(RXARG2, RXARG1, LSL, 3));
armAsm->blr(RXARG1);
armAsm->br(RXARG1);
}

g_compile_or_revalidate_block = armAsm->GetCursorAddress<const void*>();
Expand Down Expand Up @@ -1174,7 +1174,7 @@ void CPU::ARM64Recompiler::Flush(u32 flags)

void CPU::ARM64Recompiler::Compile_Fallback()
{
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits);
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);

Flush(FLUSH_FOR_INTERPRETER);

Expand Down
8 changes: 5 additions & 3 deletions src/core/cpu_recompiler_riscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,12 +279,14 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
rvAsm->LWU(RARG1, PTR(&g_state.pc));
rvMoveAddressToReg(rvAsm, RARG3, g_code_lut.data());
rvAsm->SRLI(RARG2, RARG1, 16);
rvAsm->SLLI(RARG1, RARG1, 1);
rvAsm->SLLI(RARG2, RARG2, 3);
rvAsm->ADD(RARG2, RARG2, RARG3);
rvAsm->LD(RARG2, 0, RARG2);
rvAsm->SLLI(RARG1, RARG1, 48); // idx = (pc & 0xFFFF) >> 2
rvAsm->SRLI(RARG1, RARG1, 50);
rvAsm->SLLI(RARG1, RARG1, 3);

// blr(x9[pc * 2]) (fast_map[pc >> 2])
// blr(x9[pc * 2]) (fast_map[idx])
rvAsm->ADD(RARG1, RARG1, RARG2);
rvAsm->LD(RARG1, 0, RARG1);
rvAsm->JR(RARG1);
Expand Down Expand Up @@ -996,7 +998,7 @@ void CPU::RISCV64Recompiler::Flush(u32 flags)

void CPU::RISCV64Recompiler::Compile_Fallback()
{
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits);
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);

Flush(FLUSH_FOR_INTERPRETER);

Expand Down
5 changes: 3 additions & 2 deletions src/core/cpu_recompiler_x64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,9 @@ u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
cg->mov(RWARG1, cg->dword[PTR(&g_state.pc)]);
cg->lea(RXARG2, cg->dword[PTR(g_code_lut.data())]);
cg->mov(RWARG3, RWARG1);
cg->shr(RWARG3, 16);
cg->shr(RWARG3, LUT_TABLE_SHIFT);
cg->mov(RXARG2, cg->qword[RXARG2 + RXARG3 * 8]);
cg->and_(RWARG1, (LUT_TABLE_SIZE - 1) << 2); // 0xFFFC

// call(rcx[pc * 2]) (fast_map[pc >> 2])
cg->jmp(cg->qword[RXARG2 + RXARG1 * 2]);
Expand Down Expand Up @@ -928,7 +929,7 @@ void CPU::X64Recompiler::Flush(u32 flags)

void CPU::X64Recompiler::Compile_Fallback()
{
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", iinfo->pc, inst->bits);
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc, inst->bits);

Flush(FLUSH_FOR_INTERPRETER);

Expand Down
30 changes: 14 additions & 16 deletions src/core/hotkeys.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -210,29 +210,13 @@ DEFINE_HOTKEY("OpenCheatsMenu", TRANSLATE_NOOP("Hotkeys", "General"), TRANSLATE_
if (!pressed && CanPause())
FullscreenUI::OpenCheatsMenu();
})
#endif

DEFINE_HOTKEY("Screenshot", TRANSLATE_NOOP("Hotkeys", "General"), TRANSLATE_NOOP("Hotkeys", "Save Screenshot"),
[](s32 pressed) {
if (!pressed)
System::SaveScreenshot();
})

DEFINE_HOTKEY("RecordSingleFrameGPUDump", TRANSLATE_NOOP("Hotkeys", "Graphics"),
TRANSLATE_NOOP("Hotkeys", "Record Single Frame GPU Trace"), [](s32 pressed) {
if (!pressed)
System::StartRecordingGPUDump(nullptr, 1);
})

DEFINE_HOTKEY("RecordMultiFrameGPUDump", TRANSLATE_NOOP("Hotkeys", "Graphics"),
TRANSLATE_NOOP("Hotkeys", "Record Multi-Frame GPU Trace"), [](s32 pressed) {
if (pressed > 0)
System::StartRecordingGPUDump(nullptr, 0);
else
System::StopRecordingGPUDump();
})

#ifndef __ANDROID__
DEFINE_HOTKEY("ToggleMediaCapture", TRANSLATE_NOOP("Hotkeys", "General"),
TRANSLATE_NOOP("Hotkeys", "Toggle Media Capture"), [](s32 pressed) {
if (!pressed)
Expand All @@ -257,6 +241,20 @@ DEFINE_HOTKEY("OpenLeaderboards", TRANSLATE_NOOP("Hotkeys", "General"),
})
#endif

DEFINE_HOTKEY("RecordSingleFrameGPUDump", TRANSLATE_NOOP("Hotkeys", "Graphics"),
TRANSLATE_NOOP("Hotkeys", "Record Single Frame GPU Trace"), [](s32 pressed) {
if (!pressed)
System::StartRecordingGPUDump(nullptr, 1);
})

DEFINE_HOTKEY("RecordMultiFrameGPUDump", TRANSLATE_NOOP("Hotkeys", "Graphics"),
TRANSLATE_NOOP("Hotkeys", "Record Multi-Frame GPU Trace"), [](s32 pressed) {
if (pressed > 0)
System::StartRecordingGPUDump(nullptr, 0);
else
System::StopRecordingGPUDump();
})

DEFINE_HOTKEY("Reset", TRANSLATE_NOOP("Hotkeys", "System"), TRANSLATE_NOOP("Hotkeys", "Reset System"), [](s32 pressed) {
if (!pressed)
Host::RunOnCPUThread(System::ResetSystem);
Expand Down
Loading

0 comments on commit 07d33c3

Please sign in to comment.