diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 67dcd513e5..29df062069 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -435,7 +435,7 @@ ur_result_t ur_context_handle_t_::finalize() { } { std::scoped_lock Lock(ZeEventPoolCacheMutex); - for (auto &ZePoolCache : ZeEventPoolCache) { + for (auto &ZePoolCache : ZeEventPoolCaches) { for (auto &ZePool : ZePoolCache) { auto ZeResult = ZE_CALL_NOCHECK(zeEventPoolDestroy, (ZePool)); // Gracefully handle the case that L0 was already unloaded. @@ -494,21 +494,21 @@ static const uint32_t MaxNumEventsPerPool = [] { }(); ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( - ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible, - bool ProfilingEnabled, ur_device_handle_t Device, - bool CounterBasedEventEnabled, bool UsingImmCmdList, - bool InterruptBasedEventEnabled) { + ze_event_pool_handle_t &Pool, size_t &Index, v2::event_flags_t Flags, + ur_device_handle_t Device) { // Lock while updating event pool machinery. std::scoped_lock Lock(ZeEventPoolCacheMutex); ze_device_handle_t ZeDevice = nullptr; + size_t DeviceId; if (Device) { ZeDevice = Device->ZeDevice; + DeviceId = + Device->Id.has_value() ? static_cast(Device->Id.value()) : 0; } - std::list *ZePoolCache = getZeEventPoolCache( - HostVisible, ProfilingEnabled, CounterBasedEventEnabled, UsingImmCmdList, - InterruptBasedEventEnabled, ZeDevice); + std::list *ZePoolCache = + getZeEventPoolCache(Flags, ZeDevice, DeviceId); if (!ZePoolCache->empty()) { if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) { @@ -546,14 +546,14 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ZeEventPoolDesc.count = MaxNumEventsPerPool; ZeEventPoolDesc.flags = 0; ZeEventPoolDesc.pNext = nullptr; - if (HostVisible) + if (Flags & v2::EVENT_FLAGS_HOST_VISIBLE) ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE; - if (ProfilingEnabled) + if (Flags & v2::EVENT_FLAGS_PROFILING_ENABLED) ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; logger::debug("ze_event_pool_desc_t flags set to: {}", ZeEventPoolDesc.flags); - if (CounterBasedEventEnabled) { - if (UsingImmCmdList) { + if (Flags & v2::EVENT_FLAGS_COUNTER) { + if (Flags & v2::EVENT_FLAGS_IMM_CMDLIST) { counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE; } else { counterBasedExt.flags = @@ -561,11 +561,11 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( } logger::debug("ze_event_pool_desc_t counter based flags set to: {}", counterBasedExt.flags); - if (InterruptBasedEventEnabled) { + if (Flags & EVENT_FLAG_INTERRUPT) { counterBasedExt.pNext = &eventSyncMode; } ZeEventPoolDesc.pNext = &counterBasedExt; - } else if (InterruptBasedEventEnabled) { + } else if (Flags & EVENT_FLAG_INTERRUPT) { ZeEventPoolDesc.pNext = &eventSyncMode; } @@ -592,18 +592,23 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( return UR_RESULT_SUCCESS; } -ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( - bool HostVisible, bool WithProfiling, ur_device_handle_t Device, - bool CounterBasedEventEnabled, bool InterruptBasedEventEnabled) { +ur_event_handle_t +ur_context_handle_t_::getEventFromContextCache(v2::event_flags_t Flags, + ur_device_handle_t Device) { std::scoped_lock Lock(EventCacheMutex); - auto Cache = - getEventCache(HostVisible, WithProfiling, Device, - CounterBasedEventEnabled, InterruptBasedEventEnabled); + + auto Cache = getEventCache(Flags & v2::EVENT_FLAGS_HOST_VISIBLE, + Flags & v2::EVENT_FLAGS_PROFILING_ENABLED, Device, + Flags & v2::EVENT_FLAGS_COUNTER, + Flags & v2::EVENT_FLAGS_INTERRUPT); + if (Cache->empty()) { logger::info("Cache empty (Host Visible: {}, Profiling: {}, Counter: {}, " "Interrupt: {}, Device: {})", - HostVisible, WithProfiling, CounterBasedEventEnabled, - InterruptBasedEventEnabled, Device); + (Flags & v2::EVENT_FLAGS_HOST_VISIBLE), + (Flags & v2::EVENT_FLAGS_PROFILING_ENABLED), + (Flags & v2::EVENT_FLAGS_COUNTER), + (Flags & v2::EVENT_FLAGS_INTERRUPT), Device); return nullptr; } @@ -632,7 +637,7 @@ void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) { } auto Cache = getEventCache( - Event->isHostVisible(), Event->isProfilingEnabled(), Device, + Event->HostVisibleEvent, Event->isProfilingEnabled(), Device, Event->CounterBasedEventsEnabled, Event->InterruptBasedEventsEnabled); logger::info("Inserting {} event (Host Visible: {}, Profiling: {}, Counter: " "{}, Device: {}) into cache {}", @@ -653,17 +658,30 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { } ze_device_handle_t ZeDevice = nullptr; + size_t DeviceId; + bool UsingImmediateCommandlists = !Event->UrQueue || Event->UrQueue->UsingImmCmdLists; if (!Event->IsMultiDevice && Event->UrQueue) { ZeDevice = Event->UrQueue->Device->ZeDevice; + DeviceId = Event->UrQueue->Device->Id.has_value() + ? static_cast(Event->UrQueue->Device->Id.value()) + : 0; } - - std::list *ZePoolCache = getZeEventPoolCache( - Event->isHostVisible(), Event->isProfilingEnabled(), - Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, - Event->InterruptBasedEventsEnabled, ZeDevice); + v2::event_flags_t Flags = 0; + if (UsingImmediateCommandlists) + Flags |= v2::EVENT_FLAGS_IMM_CMDLIST; + if (Event->isHostVisible()) + Flags |= v2::EVENT_FLAGS_HOST_VISIBLE; + if (Event->isProfilingEnabled()) + Flags |= v2::EVENT_FLAGS_PROFILING_ENABLED; + if (Event->CounterBasedEventsEnabled) + Flags |= v2::EVENT_FLAGS_COUNTER; + if (Event->InterruptBasedEventsEnabled) + Flags |= v2::EVENT_FLAGS_INTERRUPT; + std::list *ZePoolCache = + getZeEventPoolCache(Flags, ZeDevice, DeviceId); // Put the empty pool to the cache of the pools. if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 43608e8bfc..b5aecc7bca 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -22,6 +22,7 @@ #include #include +#include "./v2/event_provider.hpp" #include "common.hpp" #include "queue.hpp" @@ -168,9 +169,8 @@ struct ur_context_handle_t_ : _ur_object { // head. // // Cache of event pools to which host-visible events are added to. - std::vector> ZeEventPoolCache{30}; - std::vector> - ZeEventPoolCacheDeviceMap{30}; + using ZeEventPoolCache = std::list; + std::vector ZeEventPoolCaches; // This map will be used to determine if a pool is full or not // by storing number of empty slots available in the pool. @@ -213,124 +213,54 @@ struct ur_context_handle_t_ : _ur_object { // slot for a host-visible event. The ProfilingEnabled tells is we need a // slot for an event with profiling capabilities. ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &, - bool HostVisible, - bool ProfilingEnabled, - ur_device_handle_t Device, - bool CounterBasedEventEnabled, - bool UsingImmCmdList, - bool InterruptBasedEventEnabled); + v2::event_flags_t Flags, + ur_device_handle_t Device); // Get ur_event_handle_t from cache. - ur_event_handle_t getEventFromContextCache(bool HostVisible, - bool WithProfiling, - ur_device_handle_t Device, - bool CounterBasedEventEnabled, - bool InterruptBasedEventEnabled); + ur_event_handle_t getEventFromContextCache(v2::event_flags_t Flags, + ur_device_handle_t Device); // Add ur_event_handle_t to cache. void addEventToContextCache(ur_event_handle_t); - enum EventPoolCacheType { - HostVisibleCacheType, - HostInvisibleCacheType, - HostVisibleCounterBasedRegularCacheType, - HostInvisibleCounterBasedRegularCacheType, - HostVisibleCounterBasedImmediateCacheType, - HostInvisibleCounterBasedImmediateCacheType, - - HostVisibleInterruptBasedRegularCacheType, - HostInvisibleInterruptBasedRegularCacheType, - HostVisibleInterruptBasedImmediateCacheType, - HostInvisibleInterruptBasedImmediateCacheType, - - HostVisibleInterruptAndCounterBasedRegularCacheType, - HostInvisibleInterruptAndCounterBasedRegularCacheType, - HostVisibleInterruptAndCounterBasedImmediateCacheType, - HostInvisibleInterruptAndCounterBasedImmediateCacheType - }; - std::list * - getZeEventPoolCache(bool HostVisible, bool WithProfiling, - bool CounterBasedEventEnabled, bool UsingImmediateCmdList, - bool InterruptBasedEventEnabled, - ze_device_handle_t ZeDevice) { - EventPoolCacheType CacheType; - - calculateCacheIndex(HostVisible, CounterBasedEventEnabled, - UsingImmediateCmdList, InterruptBasedEventEnabled, - CacheType); + getZeEventPoolCache(v2::event_flags_t Flags, ze_device_handle_t ZeDevice, + size_t DeviceId) { + size_t index = 0; + index |= uint64_t(Flags); if (ZeDevice) { - auto ZeEventPoolCacheMap = - WithProfiling ? &ZeEventPoolCacheDeviceMap[CacheType * 2] - : &ZeEventPoolCacheDeviceMap[CacheType * 2 + 1]; - if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { - ZeEventPoolCache.emplace_back(); - ZeEventPoolCacheMap->insert( - std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1)); - } - return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]]; - } else { - return WithProfiling ? &ZeEventPoolCache[CacheType * 2] - : &ZeEventPoolCache[CacheType * 2 + 1]; + index |= v2::EVENT_FLAGS_DEVICE | (DeviceId << v2::MAX_EVENT_FLAG_BITS); } + + if (index >= ZeEventPoolCaches.size()) { + ZeEventPoolCaches.resize(index + 1); + } + return &ZeEventPoolCaches[index]; } - ur_result_t calculateCacheIndex(bool HostVisible, - bool CounterBasedEventEnabled, - bool UsingImmediateCmdList, - bool InterruptBasedEventEnabled, - EventPoolCacheType &CacheType) { - if (InterruptBasedEventEnabled) { - if (CounterBasedEventEnabled) { - if (HostVisible) { - if (UsingImmediateCmdList) { - CacheType = HostVisibleInterruptAndCounterBasedImmediateCacheType; - } else { - CacheType = HostVisibleInterruptAndCounterBasedRegularCacheType; - } - } else { - if (UsingImmediateCmdList) { - CacheType = HostInvisibleInterruptAndCounterBasedImmediateCacheType; - } else { - CacheType = HostInvisibleInterruptAndCounterBasedRegularCacheType; - } + /* + std::list * + getZeEventPoolCache(v2::event_flags_t Flags, ze_device_handle_t ZeDevice) { + size_t index = 0; + index |= Flags; + bool WithProfiling = Flags & v2::EVENT_FLAGS_PROFILING_ENABLED; + + if (ZeDevice) { + auto ZeEventPoolCacheMap = + WithProfiling ? &ZeEventPoolCachesDeviceMap[index * 2] + : &ZeEventPoolCachesDeviceMap[index * 2 + 1]; + if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) { + ZeEventPoolCaches.emplace_back(); + ZeEventPoolCacheMap->insert( + std::make_pair(ZeDevice, ZeEventPoolCaches.size() - 1)); } + return &ZeEventPoolCaches[(*ZeEventPoolCacheMap)[ZeDevice]]; } else { - if (HostVisible) { - if (UsingImmediateCmdList) { - CacheType = HostVisibleInterruptBasedImmediateCacheType; - } else { - CacheType = HostVisibleInterruptBasedRegularCacheType; - } - } else { - if (UsingImmediateCmdList) { - CacheType = HostInvisibleInterruptBasedImmediateCacheType; - } else { - CacheType = HostInvisibleInterruptBasedRegularCacheType; - } - } - } - } else { - if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) { - CacheType = HostVisibleCounterBasedRegularCacheType; - } else if (CounterBasedEventEnabled && !HostVisible && - !UsingImmediateCmdList) { - CacheType = HostInvisibleCounterBasedRegularCacheType; - } else if (CounterBasedEventEnabled && HostVisible && - UsingImmediateCmdList) { - CacheType = HostVisibleCounterBasedImmediateCacheType; - } else if (CounterBasedEventEnabled && !HostVisible && - UsingImmediateCmdList) { - CacheType = HostInvisibleCounterBasedImmediateCacheType; - } else if (!CounterBasedEventEnabled && HostVisible) { - CacheType = HostVisibleCacheType; - } else { - CacheType = HostInvisibleCacheType; + return WithProfiling ? &ZeEventPoolCaches[index * 2] + : &ZeEventPoolCaches[index * 2 + 1]; } } - - return UR_RESULT_SUCCESS; - } + */ // Decrement number of events living in the pool upon event destroy // and return the pool to the cache if there are no unreleased events. @@ -379,7 +309,6 @@ struct ur_context_handle_t_ : _ur_object { MAX_EVENT_FLAG_BITS = 5, // this is used as an offset for embedding device id }; - // Mutex to control operations on event caches. ur_mutex EventCacheMutex; @@ -412,7 +341,6 @@ struct ur_context_handle_t_ : _ur_object { if (index >= EventCaches.size()) { EventCaches.resize(index + 1); } - return &EventCaches[index]; } }; diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index e12352b6b1..04ff0b3dc8 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -1341,6 +1341,19 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool ProfilingEnabled = ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled()); bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists; + v2::event_flags_t Flags = 0; + if (ProfilingEnabled) + Flags |= v2::EVENT_FLAGS_PROFILING_ENABLED; + if (UsingImmediateCommandlists) + Flags |= v2::EVENT_FLAGS_IMM_CMDLIST; + if (HostVisible) + Flags |= v2::EVENT_FLAGS_HOST_VISIBLE; + if (IsMultiDevice) + Flags |= v2::EVENT_FLAGS_MULTIDEVICE; + if (CounterBasedEventEnabled) + Flags |= v2::EVENT_FLAGS_COUNTER; + if (InterruptBasedEventEnabled) + Flags |= v2::EVENT_FLAGS_INTERRUPT; ur_device_handle_t Device = nullptr; @@ -1348,9 +1361,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, Device = Queue->Device; } - if (auto CachedEvent = Context->getEventFromContextCache( - HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled, - InterruptBasedEventEnabled)) { + if (auto CachedEvent = Context->getEventFromContextCache(Flags, Device)) { *RetEvent = CachedEvent; return UR_RESULT_SUCCESS; } @@ -1360,10 +1371,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, size_t Index = 0; - if (auto Res = Context->getFreeSlotInExistingOrNewPool( - ZeEventPool, Index, HostVisible, ProfilingEnabled, Device, - CounterBasedEventEnabled, UsingImmediateCommandlists, - InterruptBasedEventEnabled)) + if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index, + Flags, Device)) return Res; ZeStruct ZeEventDesc; @@ -1400,6 +1409,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, if (HostVisible) (*RetEvent)->HostVisibleEvent = reinterpret_cast(*RetEvent); + (*RetEvent)->Flags = Flags; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index efae32f361..f95fa66c67 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -24,6 +24,7 @@ #include #include +#include "./v2/event_provider.hpp" #include "common.hpp" #include "queue.hpp" @@ -139,6 +140,8 @@ struct ur_event_handle_t_ : _ur_object { // Level Zero event pool handle. ze_event_pool_handle_t ZeEventPool; + v2::event_flags_t Flags; + // In case we use device-only events this holds their host-visible // counterpart. If this event is itself host-visble then HostVisibleEvent // points to this event. If this event is not host-visible then this field can diff --git a/source/adapters/level_zero/v2/event_provider.hpp b/source/adapters/level_zero/v2/event_provider.hpp index c6bedb8fc1..024a1be15b 100644 --- a/source/adapters/level_zero/v2/event_provider.hpp +++ b/source/adapters/level_zero/v2/event_provider.hpp @@ -23,10 +23,16 @@ namespace v2 { using event_flags_t = uint32_t; enum event_flag_t { - EVENT_FLAGS_COUNTER = UR_BIT(0), + EVENT_FLAGS_HOST_VISIBLE = UR_BIT(0), EVENT_FLAGS_PROFILING_ENABLED = UR_BIT(1), + EVENT_FLAGS_COUNTER = UR_BIT(2), + EVENT_FLAGS_INTERRUPT = UR_BIT(3), + EVENT_FLAGS_IMM_CMDLIST = UR_BIT(4), + EVENT_FLAGS_MULTIDEVICE = UR_BIT(6), + EVENT_FLAGS_DEVICE = UR_BIT(7), // if set, subsequent bits are device id + MAX_EVENT_FLAG_BITS = 8, }; -static constexpr size_t EVENT_FLAGS_USED_BITS = 2; +static constexpr size_t EVENT_FLAGS_USED_BITS = 9; class event_provider;