diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index d8ca3fb8c1544..d548357d1532b 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -481,7 +481,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, } event ResEvent = prepareSYCLEventAssociatedWithQueue(Self); - auto EventImpl = detail::getSyclObjImpl(ResEvent); + const auto &EventImpl = detail::getSyclObjImpl(ResEvent); { NestedCallsTracker tracker; ur_event_handle_t UREvent = nullptr; @@ -489,6 +489,17 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr &Self, EventImpl); EventImpl->setHandle(UREvent); EventImpl->setEnqueued(); + // connect returned event with dependent events + if (!isInOrder()) { + std::vector &ExpandedDepEventImplPtrs = + EventImpl->getPreparedDepsEvents(); + ExpandedDepEventImplPtrs.reserve(ExpandedDepEvents.size()); + for (const event &DepEvent : ExpandedDepEvents) + ExpandedDepEventImplPtrs.push_back( + detail::getSyclObjImpl(DepEvent)); + + EventImpl->cleanDepEventsThroughOneLevel(); + } } if (isInOrder()) { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index efbbb52acab73..d34dc24851883 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -662,7 +662,7 @@ bool CheckEventReadiness(const ContextImplPtr &Context, } bool Scheduler::areEventsSafeForSchedulerBypass( - const std::vector &DepEvents, ContextImplPtr Context) { + const std::vector &DepEvents, const ContextImplPtr &Context) { return std::all_of( DepEvents.begin(), DepEvents.end(), [&Context](const sycl::event &Event) { @@ -672,7 +672,7 @@ bool Scheduler::areEventsSafeForSchedulerBypass( } bool Scheduler::areEventsSafeForSchedulerBypass( - const std::vector &DepEvents, ContextImplPtr Context) { + const std::vector &DepEvents, const ContextImplPtr &Context) { return std::all_of(DepEvents.begin(), DepEvents.end(), [&Context](const EventImplPtr &SyclEventImplPtr) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index c6d2d07600d12..f3ce947b32e5d 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -479,10 +479,10 @@ class Scheduler { static bool areEventsSafeForSchedulerBypass(const std::vector &DepEvents, - ContextImplPtr Context); + const ContextImplPtr &Context); static bool areEventsSafeForSchedulerBypass(const std::vector &DepEvents, - ContextImplPtr Context); + const ContextImplPtr &Context); protected: using RWLockT = std::shared_timed_mutex; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 9c410aeb3a2ac..1852eec4d0875 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -468,10 +468,8 @@ event handler::finalize() { if (MQueue && !impl->MGraph && !impl->MSubgraphNode && !MQueue->getCommandGraph() && !impl->CGData.MRequirements.size() && !MStreamStorage.size() && - (!impl->CGData.MEvents.size() || - (MQueue->isInOrder() && - detail::Scheduler::areEventsSafeForSchedulerBypass( - impl->CGData.MEvents, MQueue->getContextImplPtr())))) { + detail::Scheduler::areEventsSafeForSchedulerBypass( + impl->CGData.MEvents, MQueue->getContextImplPtr())) { // if user does not add a new dependency to the dependency graph, i.e. // the graph is not changed, then this faster path is used to submit // kernel bypassing scheduler and avoiding CommandGroup, Command objects @@ -546,6 +544,11 @@ event handler::finalize() { if (NewEvent->isHost() || NewEvent->getHandle() == nullptr) NewEvent->setComplete(); NewEvent->setEnqueued(); + // connect returned event with dependent events + if (!MQueue->isInOrder()) { + NewEvent->getPreparedDepsEvents() = impl->CGData.MEvents; + NewEvent->cleanDepEventsThroughOneLevel(); + } MLastEvent = detail::createSyclObjFromImpl(NewEvent); } diff --git a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp index ef6142d392e09..74cdc090d9758 100644 --- a/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp +++ b/sycl/test-e2e/XPTI/basic_event_collection_linux.cpp @@ -32,12 +32,6 @@ // CHECK-DAG: from_source : false // CHECK-DAG: kernel_name : typeinfo name for main::{lambda(sycl::_V1::handler&)#1}::operator()(sycl::_V1::handler&) const::{lambda()#1} // CHECK-DAG: sycl_device : {{.*}} -// CHECK: Node create -// CHECK-DAG: queue_id : {{.*}} -// CHECK-DAG: kernel_name : virtual_node[{{.*}}] -// CHECK-NEXT: Edge create -// CHECK-DAG: queue_id : {{.*}} -// CHECK-DAG: event : {{.*}} // CHECK: Task begin // CHECK-DAG: queue_id : {{.*}} // CHECK-DAG: sym_line_no : {{.*}} diff --git a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp index 9366d63838d08..8c5bd97eb2ae6 100644 --- a/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp +++ b/sycl/unittests/scheduler/EnqueueWithDependsOnDeps.cpp @@ -323,6 +323,7 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { &redefinedextUSMEnqueueMemcpy); sycl::queue Queue = detail::createSyclObjFromImpl(QueueDevImpl); + // Mock up an incomplete host task auto HostTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.host_task([=]() {}); }); std::shared_ptr HostTaskEventImpl = @@ -332,6 +333,7 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { ASSERT_NE(Cmd, nullptr); Cmd->MIsBlockable = true; Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; + HostTaskEventImpl->setStateIncomplete(); auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(HostTaskEvent); @@ -341,6 +343,8 @@ TEST_F(DependsOnTests, ShortcutFunctionWithWaitList) { detail::getSyclObjImpl(SingleTaskEvent); EXPECT_EQ(SingleTaskEventImpl->getHandle(), nullptr); + // make HostTaskEvent completed, so SingleTaskEvent can be enqueued + HostTaskEventImpl->setComplete(); Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueSuccess; EventsInWaitList.clear(); @@ -375,6 +379,7 @@ TEST_F(DependsOnTests, BarrierWithWaitList) { ASSERT_NE(Cmd, nullptr); Cmd->MIsBlockable = true; Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueBlocked; + HostTaskEventImpl->setStateIncomplete(); auto SingleTaskEvent = Queue.submit([&](sycl::handler &cgh) { cgh.depends_on(HostTaskEvent); @@ -384,6 +389,7 @@ TEST_F(DependsOnTests, BarrierWithWaitList) { detail::getSyclObjImpl(SingleTaskEvent); EXPECT_EQ(SingleTaskEventImpl->getHandle(), nullptr); + HostTaskEventImpl->setComplete(); Cmd->MEnqueueStatus = detail::EnqueueResultT::SyclEnqueueSuccess; EventsInWaitList.clear();