Skip to content

Commit

Permalink
Merge pull request #1600 from winstonzhang-intel/event-bool-refactoring
Browse files Browse the repository at this point in the history
[L0] Refactoring of boolean event parameters
  • Loading branch information
kbenzie authored Feb 11, 2025
2 parents f66751d + f1c14d4 commit b05ffb5
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 145 deletions.
74 changes: 46 additions & 28 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ ur_result_t ur_context_handle_t_::finalize() {
}
{
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
for (auto &ZePoolCache : ZeEventPoolCache) {
for (auto &ZePoolCache : ZeEventPoolCaches) {
for (auto &ZePool : ZePoolCache) {
auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool));
// Gracefully handle the case that L0 was already unloaded.
Expand Down Expand Up @@ -494,21 +494,21 @@ static const uint32_t MaxNumEventsPerPool = [] {
}();

ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
bool ProfilingEnabled, ur_device_handle_t Device,
bool CounterBasedEventEnabled, bool UsingImmCmdList,
bool InterruptBasedEventEnabled) {
ze_event_pool_handle_t &Pool, size_t &Index, v2::event_flags_t Flags,
ur_device_handle_t Device) {
// Lock while updating event pool machinery.
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);

ze_device_handle_t ZeDevice = nullptr;
size_t DeviceId;

if (Device) {
ZeDevice = Device->ZeDevice;
DeviceId =
Device->Id.has_value() ? static_cast<size_t>(Device->Id.value()) : 0;
}
std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
HostVisible, ProfilingEnabled, CounterBasedEventEnabled, UsingImmCmdList,
InterruptBasedEventEnabled, ZeDevice);
std::list<ze_event_pool_handle_t> *ZePoolCache =
getZeEventPoolCache(Flags, ZeDevice, DeviceId);

if (!ZePoolCache->empty()) {
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
Expand Down Expand Up @@ -546,26 +546,26 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
ZeEventPoolDesc.count = MaxNumEventsPerPool;
ZeEventPoolDesc.flags = 0;
ZeEventPoolDesc.pNext = nullptr;
if (HostVisible)
if (Flags & v2::EVENT_FLAGS_HOST_VISIBLE)
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
if (ProfilingEnabled)
if (Flags & v2::EVENT_FLAGS_PROFILING_ENABLED)
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
logger::debug("ze_event_pool_desc_t flags set to: {}",
ZeEventPoolDesc.flags);
if (CounterBasedEventEnabled) {
if (UsingImmCmdList) {
if (Flags & v2::EVENT_FLAGS_COUNTER) {
if (Flags & v2::EVENT_FLAGS_IMM_CMDLIST) {
counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE;
} else {
counterBasedExt.flags =
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
}
logger::debug("ze_event_pool_desc_t counter based flags set to: {}",
counterBasedExt.flags);
if (InterruptBasedEventEnabled) {
if (Flags & EVENT_FLAG_INTERRUPT) {
counterBasedExt.pNext = &eventSyncMode;
}
ZeEventPoolDesc.pNext = &counterBasedExt;
} else if (InterruptBasedEventEnabled) {
} else if (Flags & EVENT_FLAG_INTERRUPT) {
ZeEventPoolDesc.pNext = &eventSyncMode;
}

Expand All @@ -592,18 +592,23 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
return UR_RESULT_SUCCESS;
}

ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(
bool HostVisible, bool WithProfiling, ur_device_handle_t Device,
bool CounterBasedEventEnabled, bool InterruptBasedEventEnabled) {
ur_event_handle_t
ur_context_handle_t_::getEventFromContextCache(v2::event_flags_t Flags,
ur_device_handle_t Device) {
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
auto Cache =
getEventCache(HostVisible, WithProfiling, Device,
CounterBasedEventEnabled, InterruptBasedEventEnabled);

auto Cache = getEventCache(Flags & v2::EVENT_FLAGS_HOST_VISIBLE,
Flags & v2::EVENT_FLAGS_PROFILING_ENABLED, Device,
Flags & v2::EVENT_FLAGS_COUNTER,
Flags & v2::EVENT_FLAGS_INTERRUPT);

if (Cache->empty()) {
logger::info("Cache empty (Host Visible: {}, Profiling: {}, Counter: {}, "
"Interrupt: {}, Device: {})",
HostVisible, WithProfiling, CounterBasedEventEnabled,
InterruptBasedEventEnabled, Device);
(Flags & v2::EVENT_FLAGS_HOST_VISIBLE),
(Flags & v2::EVENT_FLAGS_PROFILING_ENABLED),
(Flags & v2::EVENT_FLAGS_COUNTER),
(Flags & v2::EVENT_FLAGS_INTERRUPT), Device);
return nullptr;
}

Expand Down Expand Up @@ -632,7 +637,7 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) {
}

auto Cache = getEventCache(
Event->isHostVisible(), Event->isProfilingEnabled(), Device,
Event->HostVisibleEvent, Event->isProfilingEnabled(), Device,
Event->CounterBasedEventsEnabled, Event->InterruptBasedEventsEnabled);
logger::info("Inserting {} event (Host Visible: {}, Profiling: {}, Counter: "
"{}, Device: {}) into cache {}",
Expand All @@ -653,17 +658,30 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) {
}

ze_device_handle_t ZeDevice = nullptr;
size_t DeviceId;

bool UsingImmediateCommandlists =
!Event->UrQueue || Event->UrQueue->UsingImmCmdLists;

if (!Event->IsMultiDevice && Event->UrQueue) {
ZeDevice = Event->UrQueue->Device->ZeDevice;
DeviceId = Event->UrQueue->Device->Id.has_value()
? static_cast<size_t>(Event->UrQueue->Device->Id.value())
: 0;
}

std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
Event->isHostVisible(), Event->isProfilingEnabled(),
Event->CounterBasedEventsEnabled, UsingImmediateCommandlists,
Event->InterruptBasedEventsEnabled, ZeDevice);
v2::event_flags_t Flags = 0;
if (UsingImmediateCommandlists)
Flags |= v2::EVENT_FLAGS_IMM_CMDLIST;
if (Event->isHostVisible())
Flags |= v2::EVENT_FLAGS_HOST_VISIBLE;
if (Event->isProfilingEnabled())
Flags |= v2::EVENT_FLAGS_PROFILING_ENABLED;
if (Event->CounterBasedEventsEnabled)
Flags |= v2::EVENT_FLAGS_COUNTER;
if (Event->InterruptBasedEventsEnabled)
Flags |= v2::EVENT_FLAGS_INTERRUPT;
std::list<ze_event_pool_handle_t> *ZePoolCache =
getZeEventPoolCache(Flags, ZeDevice, DeviceId);

// Put the empty pool to the cache of the pools.
if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0)
Expand Down
144 changes: 36 additions & 108 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <ze_api.h>
#include <zes_api.h>

#include "./v2/event_provider.hpp"
#include "common.hpp"
#include "queue.hpp"

Expand Down Expand Up @@ -168,9 +169,8 @@ struct ur_context_handle_t_ : _ur_object {
// head.
//
// Cache of event pools to which host-visible events are added to.
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{30};
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
ZeEventPoolCacheDeviceMap{30};
using ZeEventPoolCache = std::list<ze_event_pool_handle_t>;
std::vector<ZeEventPoolCache> ZeEventPoolCaches;

// This map will be used to determine if a pool is full or not
// by storing number of empty slots available in the pool.
Expand Down Expand Up @@ -213,124 +213,54 @@ struct ur_context_handle_t_ : _ur_object {
// slot for a host-visible event. The ProfilingEnabled tells is we need a
// slot for an event with profiling capabilities.
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
bool HostVisible,
bool ProfilingEnabled,
ur_device_handle_t Device,
bool CounterBasedEventEnabled,
bool UsingImmCmdList,
bool InterruptBasedEventEnabled);
v2::event_flags_t Flags,
ur_device_handle_t Device);

// Get ur_event_handle_t from cache.
ur_event_handle_t getEventFromContextCache(bool HostVisible,
bool WithProfiling,
ur_device_handle_t Device,
bool CounterBasedEventEnabled,
bool InterruptBasedEventEnabled);
ur_event_handle_t getEventFromContextCache(v2::event_flags_t Flags,
ur_device_handle_t Device);

// Add ur_event_handle_t to cache.
void addEventToContextCache(ur_event_handle_t);

enum EventPoolCacheType {
HostVisibleCacheType,
HostInvisibleCacheType,
HostVisibleCounterBasedRegularCacheType,
HostInvisibleCounterBasedRegularCacheType,
HostVisibleCounterBasedImmediateCacheType,
HostInvisibleCounterBasedImmediateCacheType,

HostVisibleInterruptBasedRegularCacheType,
HostInvisibleInterruptBasedRegularCacheType,
HostVisibleInterruptBasedImmediateCacheType,
HostInvisibleInterruptBasedImmediateCacheType,

HostVisibleInterruptAndCounterBasedRegularCacheType,
HostInvisibleInterruptAndCounterBasedRegularCacheType,
HostVisibleInterruptAndCounterBasedImmediateCacheType,
HostInvisibleInterruptAndCounterBasedImmediateCacheType
};

std::list<ze_event_pool_handle_t> *
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
bool CounterBasedEventEnabled, bool UsingImmediateCmdList,
bool InterruptBasedEventEnabled,
ze_device_handle_t ZeDevice) {
EventPoolCacheType CacheType;

calculateCacheIndex(HostVisible, CounterBasedEventEnabled,
UsingImmediateCmdList, InterruptBasedEventEnabled,
CacheType);
getZeEventPoolCache(v2::event_flags_t Flags, ze_device_handle_t ZeDevice,
size_t DeviceId) {
size_t index = 0;
index |= uint64_t(Flags);
if (ZeDevice) {
auto ZeEventPoolCacheMap =
WithProfiling ? &ZeEventPoolCacheDeviceMap[CacheType * 2]
: &ZeEventPoolCacheDeviceMap[CacheType * 2 + 1];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
ZeEventPoolCacheMap->insert(
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
}
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
} else {
return WithProfiling ? &ZeEventPoolCache[CacheType * 2]
: &ZeEventPoolCache[CacheType * 2 + 1];
index |= v2::EVENT_FLAGS_DEVICE | (DeviceId << v2::MAX_EVENT_FLAG_BITS);
}

if (index >= ZeEventPoolCaches.size()) {
ZeEventPoolCaches.resize(index + 1);
}
return &ZeEventPoolCaches[index];
}

ur_result_t calculateCacheIndex(bool HostVisible,
bool CounterBasedEventEnabled,
bool UsingImmediateCmdList,
bool InterruptBasedEventEnabled,
EventPoolCacheType &CacheType) {
if (InterruptBasedEventEnabled) {
if (CounterBasedEventEnabled) {
if (HostVisible) {
if (UsingImmediateCmdList) {
CacheType = HostVisibleInterruptAndCounterBasedImmediateCacheType;
} else {
CacheType = HostVisibleInterruptAndCounterBasedRegularCacheType;
}
} else {
if (UsingImmediateCmdList) {
CacheType = HostInvisibleInterruptAndCounterBasedImmediateCacheType;
} else {
CacheType = HostInvisibleInterruptAndCounterBasedRegularCacheType;
}
/*
std::list<ze_event_pool_handle_t> *
getZeEventPoolCache(v2::event_flags_t Flags, ze_device_handle_t ZeDevice) {
size_t index = 0;
index |= Flags;
bool WithProfiling = Flags & v2::EVENT_FLAGS_PROFILING_ENABLED;
if (ZeDevice) {
auto ZeEventPoolCacheMap =
WithProfiling ? &ZeEventPoolCachesDeviceMap[index * 2]
: &ZeEventPoolCachesDeviceMap[index * 2 + 1];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCaches.emplace_back();
ZeEventPoolCacheMap->insert(
std::make_pair(ZeDevice, ZeEventPoolCaches.size() - 1));
}
return &ZeEventPoolCaches[(*ZeEventPoolCacheMap)[ZeDevice]];
} else {
if (HostVisible) {
if (UsingImmediateCmdList) {
CacheType = HostVisibleInterruptBasedImmediateCacheType;
} else {
CacheType = HostVisibleInterruptBasedRegularCacheType;
}
} else {
if (UsingImmediateCmdList) {
CacheType = HostInvisibleInterruptBasedImmediateCacheType;
} else {
CacheType = HostInvisibleInterruptBasedRegularCacheType;
}
}
}
} else {
if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
!UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && HostVisible &&
UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedImmediateCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedImmediateCacheType;
} else if (!CounterBasedEventEnabled && HostVisible) {
CacheType = HostVisibleCacheType;
} else {
CacheType = HostInvisibleCacheType;
return WithProfiling ? &ZeEventPoolCaches[index * 2]
: &ZeEventPoolCaches[index * 2 + 1];
}
}

return UR_RESULT_SUCCESS;
}
*/

// Decrement number of events living in the pool upon event destroy
// and return the pool to the cache if there are no unreleased events.
Expand Down Expand Up @@ -379,7 +309,6 @@ struct ur_context_handle_t_ : _ur_object {
MAX_EVENT_FLAG_BITS =
5, // this is used as an offset for embedding device id
};

// Mutex to control operations on event caches.
ur_mutex EventCacheMutex;

Expand Down Expand Up @@ -412,7 +341,6 @@ struct ur_context_handle_t_ : _ur_object {
if (index >= EventCaches.size()) {
EventCaches.resize(index + 1);
}

return &EventCaches[index];
}
};
Expand Down
24 changes: 17 additions & 7 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1341,16 +1341,27 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool ProfilingEnabled =
ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled());
bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists;
v2::event_flags_t Flags = 0;
if (ProfilingEnabled)
Flags |= v2::EVENT_FLAGS_PROFILING_ENABLED;
if (UsingImmediateCommandlists)
Flags |= v2::EVENT_FLAGS_IMM_CMDLIST;
if (HostVisible)
Flags |= v2::EVENT_FLAGS_HOST_VISIBLE;
if (IsMultiDevice)
Flags |= v2::EVENT_FLAGS_MULTIDEVICE;
if (CounterBasedEventEnabled)
Flags |= v2::EVENT_FLAGS_COUNTER;
if (InterruptBasedEventEnabled)
Flags |= v2::EVENT_FLAGS_INTERRUPT;

ur_device_handle_t Device = nullptr;

if (!IsMultiDevice && Queue) {
Device = Queue->Device;
}

if (auto CachedEvent = Context->getEventFromContextCache(
HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled,
InterruptBasedEventEnabled)) {
if (auto CachedEvent = Context->getEventFromContextCache(Flags, Device)) {
*RetEvent = CachedEvent;
return UR_RESULT_SUCCESS;
}
Expand All @@ -1360,10 +1371,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,

size_t Index = 0;

if (auto Res = Context->getFreeSlotInExistingOrNewPool(
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
CounterBasedEventEnabled, UsingImmediateCommandlists,
InterruptBasedEventEnabled))
if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index,
Flags, Device))
return Res;

ZeStruct<ze_event_desc_t> ZeEventDesc;
Expand Down Expand Up @@ -1400,6 +1409,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
if (HostVisible)
(*RetEvent)->HostVisibleEvent =
reinterpret_cast<ur_event_handle_t>(*RetEvent);
(*RetEvent)->Flags = Flags;

return UR_RESULT_SUCCESS;
}
Expand Down
Loading

0 comments on commit b05ffb5

Please sign in to comment.