From 18283e78be90b3b8259c484e486a2cca2f7ca3fa Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Mon, 3 Feb 2025 18:49:11 +0000 Subject: [PATCH] [L0 v2] extend USMFill implementation to support sizes which are not powers of 2 --- .../v2/queue_immediate_in_order.cpp | 26 ++++++++++++++----- test/conformance/enqueue/urEnqueueUSMFill.cpp | 2 ++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index d4f93f2d45..d93e09b184 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -650,14 +650,28 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( waitListView.clear(); })); - // TODO: support non-power-of-two pattern sizes - // PatternSize must be a power of two for zeCommandListAppendMemoryFill. // When it's not, the fill is emulated with zeCommandListAppendMemoryCopy. - ZE2UR_CALL(zeCommandListAppendMemoryFill, - (commandListManager.getZeCommandList(), pDst, pPattern, - patternSize, size, zeSignalEvent, waitListView.num, - waitListView.handles)); + if (isPowerOf2(patternSize)) { + ZE2UR_CALL(zeCommandListAppendMemoryFill, + (commandListManager.getZeCommandList(), pDst, pPattern, + patternSize, size, zeSignalEvent, waitListView.num, + waitListView.handles)); + } else { + // Copy pattern into every entry in memory array pointed by Ptr. + uint32_t numOfCopySteps = size / patternSize; + const void *src = pPattern; + + for (uint32_t step = 0; step < numOfCopySteps; ++step) { + void *dst = reinterpret_cast(reinterpret_cast(pDst) + + step * patternSize); + ZE2UR_CALL(zeCommandListAppendMemoryCopy, + (commandListManager.getZeCommandList(), dst, src, patternSize, + step == numOfCopySteps - 1 ? zeSignalEvent : nullptr, + waitListView.num, waitListView.handles)); + waitListView.clear(); + } + } return UR_RESULT_SUCCESS; } diff --git a/test/conformance/enqueue/urEnqueueUSMFill.cpp b/test/conformance/enqueue/urEnqueueUSMFill.cpp index ea2daa4215..0bd784f0f8 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill.cpp @@ -86,6 +86,8 @@ static std::vector test_cases{ {256, 256}, /* pattern_size < size */ {1024, 256}, + /* sizes which are not powers of 2 */ + {1000, 10}, /* pattern sizes corresponding to some common scalar and vector types */ {256, 4}, {256, 8},