Skip to content

Commit

Permalink
fix vop2 decode for vop3b instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
Inori committed Jun 7, 2022
1 parent 1a695e2 commit 7741573
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 14 deletions.
6 changes: 3 additions & 3 deletions GPCS4/Graphics/Gcn/ControlFlowGraph/GcnDivergentFlow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ namespace sce::gcn
}
}

auto tokenIf = m_factory.createIf(GcnConditionOp::Divergence);
auto tokenElse = m_factory.createElse(tokenIf);
auto tokenEnd = m_factory.createIfEnd(tokenIf, tokenElse);
auto tokenIf = m_factory.createIf(GcnConditionOp::Divergence);
auto tokenElse = m_factory.createElse(tokenIf);
auto tokenEnd = m_factory.createIfEnd(tokenIf, tokenElse);

if (!inactiveCode.insList.empty())
{
Expand Down
33 changes: 23 additions & 10 deletions GPCS4/Graphics/Gcn/GcnCompiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ namespace sce::gcn
// variables used for control
// flow first.
compileGlobalVariable(tokens);

// Compile each token left
for (const auto& token : tokens)
{
compileToken(*token);
Expand Down Expand Up @@ -304,11 +304,11 @@ namespace sce::gcn
// so we only set EXEC bit against invocation id.
if (m_moduleInfo.options.separateSubgroup)
{
if (m_programInfo.type() == GcnProgramType::ComputeShader)
{
result = emitComputeDivergence();
}
else
//if (m_programInfo.type() == GcnProgramType::ComputeShader)
//{
// result = emitComputeDivergence();
//}
//else
{
auto mask = emitCommonSystemValueLoad(
GcnSystemValue::SubgroupEqMask, GcnRegMask::select(0));
Expand Down Expand Up @@ -1578,6 +1578,12 @@ namespace sce::gcn
m_module.setDebugName(m_state.scc.id, "scc");
}

void GcnCompiler::emitInitCsExec(const GcnRegisterValue& eqMask)
{
// For compute shader, we initialize low 32-bits of exec
// with even subgroup and high 32 bis with odd.

}

void GcnCompiler::emitInitStateRegister()
{
Expand All @@ -1586,15 +1592,23 @@ namespace sce::gcn
auto eqMask = emitCommonSystemValueLoad(
GcnSystemValue::SubgroupEqMask, GcnRegMask::firstN(2));

//GcnRegisterValue ballot = {};
//ballot.type.ctype = GcnScalarType::Uint32;
//ballot.type.ccount = 4;
//ballot.id = m_module.opGroupNonUniformBallot(
// getVectorTypeId(ballot.type),
// m_module.constu32(spv::ScopeSubgroup),
// m_module.constBool(true));

// We cheat the shader as if the CU only provide one single thread,
// so we only set EXEC bit against invocation id.
if (m_moduleInfo.options.separateSubgroup)
{
auto mask = emitRegisterExtract(eqMask, GcnRegMask::select(0));
auto exec = emitRegisterExtract(eqMask, GcnRegMask::select(0));

// Set high 32 bits to zero,
// cheat the shader that the high 32 lanes are inactive.
m_state.exec.init(mask.id, m_module.constu32(0));
m_state.exec.init(exec.id, m_module.constu32(0));
}
else
{
Expand Down Expand Up @@ -3931,5 +3945,4 @@ namespace sce::gcn
}



} // namespace sce::gcn
2 changes: 2 additions & 0 deletions GPCS4/Graphics/Gcn/GcnCompiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ namespace sce::gcn

////////////////////////////
// Input/output preparation
void emitInitCsExec(
const GcnRegisterValue& eqMask);
void emitInitStateRegister();
void emitInputSetup();
void emitFetchInput();
Expand Down
10 changes: 10 additions & 0 deletions GPCS4/Graphics/Gcn/GcnCompilerVectorALU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,14 @@ namespace sce::gcn
result.low.type.ccount = 1;
result.high.type = result.low.type;

//GcnRegisterValue ballot = {};
//ballot.type.ctype = GcnScalarType::Uint32;
//ballot.type.ccount = 4;
//ballot.id = m_module.opGroupNonUniformBallot(
// getVectorTypeId(ballot.type),
// m_module.constu32(spv::ScopeSubgroup),
// m_module.constBool(true));

// Because we only set one bit against invocation id upon shader launch,
// we'll only operate that bit for compare instructions.
// This we way can use native GPU lane slot to store vgpr values,
Expand All @@ -502,6 +510,8 @@ namespace sce::gcn
auto exec = m_state.exec.emitLoad(GcnRegMask::select(0));

result.low.id = m_module.opBitwiseAnd(typeId, sValue, exec.low.id);

//result.low = emitRegisterExtract(ballot, GcnRegMask::select(0));

// Always set high 32-bits of the compare result to zero,
// which means the high 32 lanes is inactive,
Expand Down
9 changes: 8 additions & 1 deletion GPCS4/Graphics/Gcn/GcnDecoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,8 @@ namespace sce::gcn
m_instruction.dstCount = 1;

GcnOpcodeVOP2 vop2Op = static_cast<GcnOpcodeVOP2>(op);
if (vop2Op == GcnOpcodeVOP2::V_READLANE_B32 || vop2Op == GcnOpcodeVOP2::V_WRITELANE_B32)
if (vop2Op == GcnOpcodeVOP2::V_READLANE_B32 ||
vop2Op == GcnOpcodeVOP2::V_WRITELANE_B32)
{
// vsrc1 is scalar for lane instructions
m_instruction.src[1].field = getOperandField(vsrc1);
Expand All @@ -663,6 +664,12 @@ namespace sce::gcn
m_instruction.dst[1].type = GcnScalarType::Uint32;
m_instruction.dst[1].code = vdst;
}
else if (isVop3BEncoding(m_instruction.opcode))
{
m_instruction.dst[1].field = GcnOperandField::VccLo;
m_instruction.dst[1].type = GcnScalarType::Uint64;
m_instruction.dst[1].code = static_cast<uint32_t>(GcnOperandField::VccLo);
}
}

void GcnDecodeContext::decodeInstructionSMRD(uint32_t hexInstruction)
Expand Down

0 comments on commit 7741573

Please sign in to comment.