Skip to content

Commit

Permalink
Implements copying of dynamically sized memory
Browse files Browse the repository at this point in the history
Also adds helper function to insert while-loop into generated code.

See doe300/VC4CL#81

Fixes:
TestVC4C/emulator/test_async_copy
  • Loading branch information
doe300 committed Sep 7, 2019
1 parent 707e55b commit 8b672d6
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 17 deletions.
2 changes: 2 additions & 0 deletions src/BasicBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ void BasicBlock::dumpInstructions() const
[](const std::unique_ptr<intermediate::IntermediateInstruction>& instr) {
if(instr)
logging::debug() << instr->to_string() << logging::endl;
else
logging::debug() << "(null)" << logging::endl;
});
logging::debug() << "Block end ----" << logging::endl;
});
Expand Down
27 changes: 27 additions & 0 deletions src/intermediate/Helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -363,3 +363,30 @@ FastSet<const Local*> intermediate::getEquivalenceClass(const Local* local)

return clazz;
}

BasicBlock& intermediate::insertLoop(Method& method, InstructionWalker& it, const Value& conditionValue,
ConditionCode repeatCondition, const std::string& label)
{
auto loopLabel = method.addNewLocal(TYPE_LABEL, label);
auto preheaderLabel = method.addNewLocal(TYPE_LABEL, loopLabel.local()->name, "preheader");
auto afterLoopLabel = method.addNewLocal(TYPE_LABEL, loopLabel.local()->name, "after");

auto preheaderIt = method.emplaceLabel(it, new BranchLabel(*preheaderLabel.local()));
preheaderIt.nextInBlock();

// in the preheader, jump over loop only when condition becomes false, otherwise fall through loop content block
preheaderIt.emplace(new Branch(loopLabel.local(), repeatCondition, conditionValue));
preheaderIt.nextInBlock();
preheaderIt.emplace(new Branch(afterLoopLabel.local(), repeatCondition.invert(), conditionValue));
preheaderIt.nextInBlock();

auto inLoopIt = method.emplaceLabel(preheaderIt, new BranchLabel(*loopLabel.local()));
inLoopIt.nextInBlock();

// in loop content block, unconditionally jump back to preheader
inLoopIt.emplace(new Branch(preheaderLabel.local(), COND_ALWAYS, BOOL_TRUE));
inLoopIt.nextInBlock();

it = method.emplaceLabel(inLoopIt, new BranchLabel(*afterLoopLabel.local()));
return *inLoopIt.getBasicBlock();
}
15 changes: 15 additions & 0 deletions src/intermediate/Helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,21 @@ namespace vc4c
* See https://en.wikipedia.org/wiki/Equivalence_class
*/
FastSet<const Local*> getEquivalenceClass(const Local* local);

/**
* Inserts a tight loop into the given method at the given position
*
* The input instruction walker will be set to the first instruction (the label) in the block FOLLOWING the
* loop. The output basic block is the inserted block and can be used to insert code into the loop itself.
*
* NOTE: The inserted loop will be a while(conditionValue) loop, so the condition variable needs to be
* initialized before the loop starts.
*
* NOTE: The loop is repeated as long as the conditionValue matches the repeatCondition. Normal branch condition
* behavior applies, so only the first element of the conditionValue is actually checked!
*/
NODISCARD BasicBlock& insertLoop(Method& method, InstructionWalker& it, const Value& conditionValue,
ConditionCode repeatCondition, const std::string& label = "");
} // namespace intermediate
} // namespace vc4c

Expand Down
11 changes: 6 additions & 5 deletions src/intrinsics/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,12 @@ static IntrinsicFunction intrinsifyDMAAccess(DMAAccess access, bool setMutex)
<< logging::endl);
const DataType type = callSite->assertArgument(0).type.getElementType();
if(!callSite->getArgument(2) || !callSite->assertArgument(2).getLiteralValue())
throw CompilationError(CompilationStep::OPTIMIZER,
"Memory copy with non-constant size is not yet supported", callSite->to_string());
it = method.vpm->insertCopyRAM(method, it, callSite->assertArgument(0), callSite->assertArgument(1),
callSite->assertArgument(2).getLiteralValue()->unsignedInt() * type.getInMemoryWidth(), nullptr,
setMutex);
it = method.vpm->insertCopyRAMDynamic(method, it, callSite->assertArgument(0),
callSite->assertArgument(1), callSite->assertArgument(2), nullptr, setMutex);
else
it = method.vpm->insertCopyRAM(method, it, callSite->assertArgument(0), callSite->assertArgument(1),
callSite->assertArgument(2).getLiteralValue()->unsignedInt() * type.getInMemoryWidth(), nullptr,
setMutex);
break;
}
case DMAAccess::PREFETCH:
Expand Down
22 changes: 13 additions & 9 deletions src/normalization/MemoryMappings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -642,17 +642,21 @@ static InstructionWalker mapMemoryCopy(
else if(srcInRAM && destInRAM)
{
// copy from RAM into RAM -> DMA read + DMA write
if(!numEntries.isLiteralValue())
throw CompilationError(CompilationStep::OPTIMIZER,
"Copying dynamically sized memory within RAM is not yet implemented", mem->to_string());
uint64_t numBytes = numEntries.getLiteralValue()->unsignedInt() *
(mem->getSourceElementType().getScalarBitCount() * mem->getSourceElementType().getVectorWidth()) / 8;
if(numBytes > std::numeric_limits<unsigned>::max())
throw CompilationError(CompilationStep::OPTIMIZER, "Cannot copy more than 4GB of data", mem->to_string());
CPPLOG_LAZY(logging::Level::DEBUG,
log << "Mapping copy from RAM into RAM to DMA read and DMA write: " << mem->to_string() << logging::endl);
it = method.vpm->insertCopyRAM(
method, it, mem->getDestination(), mem->getSource(), static_cast<unsigned>(numBytes), nullptr);
if(!numEntries.isLiteralValue())
it = method.vpm->insertCopyRAMDynamic(method, it, mem->getDestination(), mem->getSource(), numEntries);
else
{
uint64_t numBytes = numEntries.getLiteralValue()->unsignedInt() *
(mem->getSourceElementType().getScalarBitCount() * mem->getSourceElementType().getVectorWidth()) / 8;
if(numBytes > std::numeric_limits<unsigned>::max())
throw CompilationError(
CompilationStep::OPTIMIZER, "Cannot copy more than 4GB of data", mem->to_string());

it = method.vpm->insertCopyRAM(
method, it, mem->getDestination(), mem->getSource(), static_cast<unsigned>(numBytes), nullptr);
}
return it.erase();
}
else if(destInRegister && destInfo.convertedRegisterType)
Expand Down
34 changes: 34 additions & 0 deletions src/periphery/VPM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "VPM.h"

#include "../Profiler.h"
#include "../intermediate/Helper.h"
#include "../intermediate/VectorHelper.h"
#include "../intermediate/operators.h"
#include "log.h"
Expand Down Expand Up @@ -720,6 +721,39 @@ InstructionWalker VPM::insertCopyRAM(Method& method, InstructionWalker it, const
return it;
}

InstructionWalker VPM::insertCopyRAMDynamic(Method& method, InstructionWalker it, const Value& destAddress,
const Value& srcAddress, const Value& numEntries, const VPMArea* area, bool useMutex)
{
it = insertLockMutex(it, useMutex);

// count from maximum to 0 (exclusive)
auto counter = assign(it, numEntries.type, "%remaining_iterations") = numEntries;
auto& block = intermediate::insertLoop(method, it, counter, COND_ZERO_CLEAR, "dynamic_dma_copy");
{
// inside the loop, a single iteration
auto inLoopIt = block.walk().nextInBlock();
auto elementType = destAddress.type.getElementType();
auto index = assign(inLoopIt, counter.type) = numEntries - counter;
// XXX does not support more than 2^23 elements
auto offset = assign(inLoopIt, counter.type) =
mul24(index, Value(Literal(elementType.getInMemoryWidth()), TYPE_INT32));

// increment offset from base address
Value tmpSource = assign(inLoopIt, srcAddress.type, "%mem_copy_addr") = srcAddress + offset;
Value tmpDest = assign(inLoopIt, destAddress.type, "%mem_copy_addr") = destAddress + offset;

inLoopIt = insertReadRAM(method, inLoopIt, tmpSource, elementType, area, false);
inLoopIt = insertWriteRAM(method, inLoopIt, tmpDest, elementType, area, false);

// decrement remaining iterations counter
assign(inLoopIt, counter) = counter - INT_ONE;
}

it.nextInBlock();
it = insertUnlockMutex(it, useMutex);
return it;
}

InstructionWalker VPM::insertFillRAM(Method& method, InstructionWalker it, const Value& memoryAddress, DataType type,
const unsigned numCopies, const VPMArea* area, bool useMutex)
{
Expand Down
4 changes: 4 additions & 0 deletions src/periphery/VPM.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,10 @@ namespace vc4c
*/
NODISCARD InstructionWalker insertCopyRAM(Method& method, InstructionWalker it, const Value& destAddress,
const Value& srcAddress, unsigned numBytes, const VPMArea* area = nullptr, bool useMutex = true);

NODISCARD InstructionWalker insertCopyRAMDynamic(Method& method, InstructionWalker it,
const Value& destAddress, const Value& srcAddress, const Value& numEntries,
const VPMArea* area = nullptr, bool useMutex = true);
/*
* Inserts a filling of a memory-area with a single value from VPM
*/
Expand Down
6 changes: 3 additions & 3 deletions src/tools/Emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1703,9 +1703,9 @@ static void emulateStep(std::vector<qpu_asm::Instruction>::const_iterator firstI
}
catch(const std::exception&)
{
logging::error() << "Emulation threw exception execution in following instruction on QPU " << qpus[i].ID
<< ": " << qpus[i].getCurrentInstruction(firstInstruction)->toHexString(true)
<< logging::endl;
logging::error() << "Emulation threw exception execution in following instruction on QPU "
<< static_cast<unsigned>(qpus[i].ID) << ": "
<< qpus[i].getCurrentInstruction(firstInstruction)->toHexString(true) << logging::endl;
// re-throw error
throw;
}
Expand Down

0 comments on commit 8b672d6

Please sign in to comment.