Skip to content

Commit

Permalink
[L0 v2] extend USMFill implementation to support sizes which are not …
Browse files Browse the repository at this point in the history
…powers of 2
  • Loading branch information
igchor committed Feb 4, 2025
1 parent ee701e0 commit 18283e7
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 6 deletions.
26 changes: 20 additions & 6 deletions source/adapters/level_zero/v2/queue_immediate_in_order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -650,14 +650,28 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked(
waitListView.clear();
}));

// TODO: support non-power-of-two pattern sizes

// PatternSize must be a power of two for zeCommandListAppendMemoryFill.
// When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
ZE2UR_CALL(zeCommandListAppendMemoryFill,
(commandListManager.getZeCommandList(), pDst, pPattern,
patternSize, size, zeSignalEvent, waitListView.num,
waitListView.handles));
if (isPowerOf2(patternSize)) {
ZE2UR_CALL(zeCommandListAppendMemoryFill,
(commandListManager.getZeCommandList(), pDst, pPattern,
patternSize, size, zeSignalEvent, waitListView.num,
waitListView.handles));
} else {
// Copy pattern into every entry in memory array pointed by Ptr.
uint32_t numOfCopySteps = size / patternSize;
const void *src = pPattern;

for (uint32_t step = 0; step < numOfCopySteps; ++step) {
void *dst = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(pDst) +
step * patternSize);
ZE2UR_CALL(zeCommandListAppendMemoryCopy,
(commandListManager.getZeCommandList(), dst, src, patternSize,
step == numOfCopySteps - 1 ? zeSignalEvent : nullptr,
waitListView.num, waitListView.handles));
waitListView.clear();
}
}

return UR_RESULT_SUCCESS;
}
Expand Down
2 changes: 2 additions & 0 deletions test/conformance/enqueue/urEnqueueUSMFill.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ static std::vector<testParametersFill> test_cases{
{256, 256},
/* pattern_size < size */
{1024, 256},
/* sizes which are not powers of 2 */
{1000, 10},
/* pattern sizes corresponding to some common scalar and vector types */
{256, 4},
{256, 8},
Expand Down

0 comments on commit 18283e7

Please sign in to comment.