From cb884a59573cf7513752aa3936a62a8730a93af2 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Fri, 18 Jul 2025 16:01:21 +0100 Subject: [PATCH 1/3] [UR][Offload] Improvements to offload event handling * Implemented device infos for event handles. * Fixed event being deleted before it should have been. * Make the tests consider profiling info optional. --- .../source/adapters/offload/enqueue.cpp | 46 ++++++++++--------- .../source/adapters/offload/event.cpp | 9 +++- .../source/adapters/offload/event.hpp | 7 ++- .../source/adapters/offload/queue.cpp | 3 +- .../source/adapters/offload/queue.hpp | 1 + .../event/urEventGetProfilingInfo.cpp | 12 +++-- 6 files changed, 48 insertions(+), 30 deletions(-) diff --git a/unified-runtime/source/adapters/offload/enqueue.cpp b/unified-runtime/source/adapters/offload/enqueue.cpp index 24cf8a84b6cb..d35d2bd40e5e 100644 --- a/unified-runtime/source/adapters/offload/enqueue.cpp +++ b/unified-runtime/source/adapters/offload/enqueue.cpp @@ -74,7 +74,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( hKernel->Args.getStorageSize(), &LaunchArgs, &EventOut)); if (phEvent) { - auto *Event = new ur_event_handle_t_(); + auto *Event = new ur_event_handle_t_(UR_COMMAND_KERNEL_LAUNCH, hQueue); Event->OffloadEvent = EventOut; *phEvent = Event; } @@ -94,10 +94,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( } namespace { -ur_result_t doMemcpy(ur_queue_handle_t hQueue, void *DestPtr, - ol_device_handle_t DestDevice, const void *SrcPtr, - ol_device_handle_t SrcDevice, size_t size, bool blocking, - uint32_t numEventsInWaitList, +ur_result_t doMemcpy(ur_command_t Command, ur_queue_handle_t hQueue, + void *DestPtr, ol_device_handle_t DestDevice, + const void *SrcPtr, ol_device_handle_t SrcDevice, + size_t size, bool blocking, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { // Ignore wait list for now @@ -115,7 +115,7 @@ ur_result_t doMemcpy(ur_queue_handle_t hQueue, void *DestPtr, } if (phEvent) { - auto *Event = new ur_event_handle_t_(); + auto *Event = new ur_event_handle_t_(Command, hQueue); Event->OffloadEvent = EventOut; *phEvent = Event; } @@ -131,8 +131,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( char *DevPtr = reinterpret_cast(std::get(hBuffer->Mem).Ptr); - return doMemcpy(hQueue, pDst, Adapter->HostDevice, DevPtr + offset, - hQueue->OffloadDevice, size, blockingRead, + return doMemcpy(UR_COMMAND_MEM_BUFFER_READ, hQueue, pDst, Adapter->HostDevice, + DevPtr + offset, hQueue->OffloadDevice, size, blockingRead, numEventsInWaitList, phEventWaitList, phEvent); } @@ -143,9 +143,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( char *DevPtr = reinterpret_cast(std::get(hBuffer->Mem).Ptr); - return doMemcpy(hQueue, DevPtr + offset, hQueue->OffloadDevice, pSrc, - Adapter->HostDevice, size, blockingWrite, numEventsInWaitList, - phEventWaitList, phEvent); + return doMemcpy(UR_COMMAND_MEM_BUFFER_WRITE, hQueue, DevPtr + offset, + hQueue->OffloadDevice, pSrc, Adapter->HostDevice, size, + blockingWrite, numEventsInWaitList, phEventWaitList, phEvent); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( @@ -159,10 +159,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( return Err; } - return doMemcpy(hQueue, pDst, Adapter->HostDevice, - reinterpret_cast(Ptr) + offset, - hQueue->OffloadDevice, count, blockingRead, - numEventsInWaitList, phEventWaitList, phEvent); + return doMemcpy( + UR_COMMAND_DEVICE_GLOBAL_VARIABLE_READ, hQueue, pDst, Adapter->HostDevice, + reinterpret_cast(Ptr) + offset, hQueue->OffloadDevice, + count, blockingRead, numEventsInWaitList, phEventWaitList, phEvent); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( @@ -176,18 +176,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( return Err; } - return doMemcpy(hQueue, reinterpret_cast(Ptr) + offset, - hQueue->OffloadDevice, pSrc, Adapter->HostDevice, count, - blockingWrite, numEventsInWaitList, phEventWaitList, phEvent); + return doMemcpy(UR_COMMAND_DEVICE_GLOBAL_VARIABLE_WRITE, hQueue, + reinterpret_cast(Ptr) + offset, hQueue->OffloadDevice, + pSrc, Adapter->HostDevice, count, blockingWrite, + numEventsInWaitList, phEventWaitList, phEvent); } -ur_result_t enqueueNoOp(ur_queue_handle_t hQueue, ur_event_handle_t *phEvent) { +ur_result_t enqueueNoOp(ur_command_t Type, ur_queue_handle_t hQueue, + ur_event_handle_t *phEvent) { // This path is a no-op, but we can't output a real event because // Offload doesn't currently support creating arbitrary events, and we // don't know the last real event in the queue. Instead we just have to // wait on the whole queue and then return an empty (implicitly // finished) event. - *phEvent = ur_event_handle_t_::createEmptyEvent(); + *phEvent = ur_event_handle_t_::createEmptyEvent(Type, hQueue); return urQueueFinish(hQueue); } @@ -221,7 +223,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( } if (phEvent) { - enqueueNoOp(hQueue, phEvent); + enqueueNoOp(UR_COMMAND_MEM_BUFFER_MAP, hQueue, phEvent); } } *ppRetMap = MapPtr; @@ -255,7 +257,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( } if (phEvent) { - enqueueNoOp(hQueue, phEvent); + enqueueNoOp(UR_COMMAND_MEM_UNMAP, hQueue, phEvent); } } BufferImpl.unmap(pMappedPtr); diff --git a/unified-runtime/source/adapters/offload/event.cpp b/unified-runtime/source/adapters/offload/event.cpp index 47bcb0745f4b..aab41ed3d2d0 100644 --- a/unified-runtime/source/adapters/offload/event.cpp +++ b/unified-runtime/source/adapters/offload/event.cpp @@ -12,6 +12,7 @@ #include #include "event.hpp" +#include "queue.hpp" #include "ur2offload.hpp" UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, @@ -22,6 +23,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { + case UR_EVENT_INFO_CONTEXT: + return ReturnValue(hEvent->UrQueue->UrContext); + case UR_EVENT_INFO_COMMAND_QUEUE: + return ReturnValue(hEvent->UrQueue); + case UR_EVENT_INFO_COMMAND_TYPE: + return ReturnValue(hEvent->Type); case UR_EVENT_INFO_REFERENCE_COUNT: return ReturnValue(hEvent->RefCount.load()); default: @@ -61,9 +68,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventRelease(ur_event_handle_t hEvent) { if (Res) { return offloadResultToUR(Res); } + delete hEvent; } - delete hEvent; return UR_RESULT_SUCCESS; } diff --git a/unified-runtime/source/adapters/offload/event.hpp b/unified-runtime/source/adapters/offload/event.hpp index 642c61e532d8..84f7b02b647c 100644 --- a/unified-runtime/source/adapters/offload/event.hpp +++ b/unified-runtime/source/adapters/offload/event.hpp @@ -18,9 +18,12 @@ struct ur_event_handle_t_ : RefCounted { ol_event_handle_t OffloadEvent; ur_command_t Type; + ur_queue_handle_t UrQueue; - static ur_event_handle_t createEmptyEvent() { - auto *Event = new ur_event_handle_t_(); + ur_event_handle_t_(ur_command_t Type, ur_queue_handle_t Queue) : Type(Type), UrQueue(Queue) {} + + static ur_event_handle_t createEmptyEvent(ur_command_t Type, ur_queue_handle_t Queue) { + auto *Event = new ur_event_handle_t_(Type, Queue); // Null event represents an empty event. Waiting on it is a no-op. Event->OffloadEvent = nullptr; diff --git a/unified-runtime/source/adapters/offload/queue.cpp b/unified-runtime/source/adapters/offload/queue.cpp index 6d4beea511d1..d821b46b5774 100644 --- a/unified-runtime/source/adapters/offload/queue.cpp +++ b/unified-runtime/source/adapters/offload/queue.cpp @@ -18,7 +18,7 @@ #include "ur2offload.hpp" UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( - [[maybe_unused]] ur_context_handle_t hContext, ur_device_handle_t hDevice, + ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *, ur_queue_handle_t *phQueue) { assert(hContext->Device == hDevice); @@ -31,6 +31,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( } Queue->OffloadDevice = hDevice->OffloadDevice; + Queue->UrContext = hContext; *phQueue = Queue; diff --git a/unified-runtime/source/adapters/offload/queue.hpp b/unified-runtime/source/adapters/offload/queue.hpp index 6afe4bf15098..e9c642f52833 100644 --- a/unified-runtime/source/adapters/offload/queue.hpp +++ b/unified-runtime/source/adapters/offload/queue.hpp @@ -18,4 +18,5 @@ struct ur_queue_handle_t_ : RefCounted { ol_queue_handle_t OffloadQueue; ol_device_handle_t OffloadDevice; + ur_context_handle_t UrContext; }; diff --git a/unified-runtime/test/conformance/event/urEventGetProfilingInfo.cpp b/unified-runtime/test/conformance/event/urEventGetProfilingInfo.cpp index e4f3d1f894ee..87b02ca9afcd 100644 --- a/unified-runtime/test/conformance/event/urEventGetProfilingInfo.cpp +++ b/unified-runtime/test/conformance/event/urEventGetProfilingInfo.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "fixtures.h" +#include "uur/checks.h" #include "uur/known_failure.h" using urEventGetProfilingInfoTest = uur::event::urEventTest; @@ -179,8 +180,9 @@ TEST_P(urEventGetProfilingInfoTest, InvalidValue) { const ur_profiling_info_t property_name = UR_PROFILING_INFO_COMMAND_QUEUED; size_t property_size = 0; - ASSERT_SUCCESS(urEventGetProfilingInfo(event, property_name, 0, nullptr, - &property_size)); + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urEventGetProfilingInfo(event, property_name, 0, nullptr, &property_size), + property_name); ASSERT_NE(property_size, 0); uint64_t property_value = 0; @@ -221,8 +223,10 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEventGetProfilingInfoForWaitWithBarrier); TEST_P(urEventGetProfilingInfoForWaitWithBarrier, Success) { uint64_t submit_value = 0; - ASSERT_SUCCESS(urEventGetProfilingInfo(event, UR_PROFILING_INFO_COMMAND_START, - size, &submit_value, nullptr)); + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urEventGetProfilingInfo(event, UR_PROFILING_INFO_COMMAND_START, size, + &submit_value, nullptr), + UR_PROFILING_INFO_COMMAND_START); ASSERT_NE(submit_value, 0); uint64_t complete_value = 0; From 5c909740853573dec7e32162e6bfaddf3da0d36a Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Mon, 21 Jul 2025 09:53:21 +0100 Subject: [PATCH 2/3] clang-format --- unified-runtime/source/adapters/offload/event.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/unified-runtime/source/adapters/offload/event.hpp b/unified-runtime/source/adapters/offload/event.hpp index 84f7b02b647c..42860e68bed3 100644 --- a/unified-runtime/source/adapters/offload/event.hpp +++ b/unified-runtime/source/adapters/offload/event.hpp @@ -20,9 +20,11 @@ struct ur_event_handle_t_ : RefCounted { ur_command_t Type; ur_queue_handle_t UrQueue; - ur_event_handle_t_(ur_command_t Type, ur_queue_handle_t Queue) : Type(Type), UrQueue(Queue) {} + ur_event_handle_t_(ur_command_t Type, ur_queue_handle_t Queue) + : Type(Type), UrQueue(Queue) {} - static ur_event_handle_t createEmptyEvent(ur_command_t Type, ur_queue_handle_t Queue) { + static ur_event_handle_t createEmptyEvent(ur_command_t Type, + ur_queue_handle_t Queue) { auto *Event = new ur_event_handle_t_(Type, Queue); // Null event represents an empty event. Waiting on it is a no-op. Event->OffloadEvent = nullptr; From be1820f82726e99411ea5cfc40f74762d1c3a2e1 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Mon, 21 Jul 2025 10:17:11 +0100 Subject: [PATCH 3/3] More clang-format --- unified-runtime/source/adapters/offload/queue.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unified-runtime/source/adapters/offload/queue.cpp b/unified-runtime/source/adapters/offload/queue.cpp index d821b46b5774..b2e28ddd7042 100644 --- a/unified-runtime/source/adapters/offload/queue.cpp +++ b/unified-runtime/source/adapters/offload/queue.cpp @@ -17,9 +17,10 @@ #include "queue.hpp" #include "ur2offload.hpp" -UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *, ur_queue_handle_t *phQueue) { +UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate(ur_context_handle_t hContext, + ur_device_handle_t hDevice, + const ur_queue_properties_t *, + ur_queue_handle_t *phQueue) { assert(hContext->Device == hDevice);