From 68407dfe781f5bed060b8fb178d145bdb9be3824 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 2 Apr 2026 07:10:22 +0200 Subject: [PATCH 1/5] [XPTI] Add xptiSubscriberInit and xptiSubscriberFinish callback APIs Implement optional subscriber lifecycle callbacks that are invoked once per subscriber instance (when loaded/unloaded), distinct from the per-stream xptiTraceInit/xptiTraceFinish callbacks. Changes: - Add xpti_subscriber_handle_t opaque handle type - Add xptiSubscriberInit(handle) called when subscriber is loaded - Add xptiSubscriberFinish(handle) called when subscriber is unloaded - Update framework to invoke callbacks if present - Add example implementations to basic_collector - Update documentation with usage information These callbacks are optional and backward compatible with existing subscribers that do not implement them. --- xpti/include/xpti/xpti_data_types.h | 70 +++++++++++++++++++ xptifw/doc/XPTI_Framework.md | 44 ++++++++++-- .../basic_collector/basic_collector.cpp | 16 +++++ xptifw/src/xpti_trace_framework.cpp | 43 +++++++++++- 4 files changed, 165 insertions(+), 8 deletions(-) diff --git a/xpti/include/xpti/xpti_data_types.h b/xpti/include/xpti/xpti_data_types.h index 0f9e4f2811829..f93f4a9220c15 100644 --- a/xpti/include/xpti/xpti_data_types.h +++ b/xpti/include/xpti/xpti_data_types.h @@ -864,6 +864,19 @@ enum class metadata_type_t { boolean = 5 }; +/// @typedef subscriber_handle_t +/// @brief Opaque handle type for identifying a subscriber instance. +/// +/// Each subscriber loaded by the XPTI framework is assigned a unique handle +/// that remains valid throughout the subscriber's lifetime (from +/// xptiSubscriberInit to xptiSubscriberFinish). The handle is used to identify +/// the subscriber in XPTI API calls that require subscriber-specific context. +/// +/// This is an opaque type implemented as a unique 64-bit identifier. Subscribers +/// should treat this as an opaque value and not make assumptions about its +/// internal representation. +using subscriber_handle_t = uint64_t; + /// @struct reserved_data_t /// @brief Holds additional data associated with a trace event. /// @@ -1246,6 +1259,27 @@ typedef void (*plugin_init_t)(unsigned int, unsigned int, const char *, const char *); typedef void (*plugin_fini_t)(const char *); +/// @typedef subscriber_init_t +/// @brief Function pointer type for subscriber initialization callback. +/// +/// This callback is invoked once when a subscriber is loaded and initialized +/// by the XPTI framework. The subscriber receives an opaque handle that must +/// be used in subsequent XPTI API calls requiring subscriber context. +/// +/// @param self The opaque subscriber handle for this subscriber instance. +typedef void (*subscriber_init_t)(xpti::subscriber_handle_t self); + +/// @typedef subscriber_fini_t +/// @brief Function pointer type for subscriber finalization callback. +/// +/// This callback is invoked once when a subscriber is being unloaded by the +/// XPTI framework. It is called after all xptiTraceFinish callbacks and marks +/// the end of the subscriber's lifetime. Subscribers should perform final +/// cleanup operations here. +/// +/// @param self The opaque subscriber handle for this subscriber instance. +typedef void (*subscriber_fini_t)(xpti::subscriber_handle_t self); + constexpr uint16_t trace_task_begin = static_cast(xpti::trace_point_type_t::task_begin); constexpr uint16_t trace_task_end = @@ -1445,4 +1479,40 @@ XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, /// subscribed to this stream can now free up all internal data structures and /// memory that has been allocated to manage the stream data. XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name); + +/// @brief Subscriber initialization callback +/// @details This function is called by the XPTI framework once when a subscriber +/// is loaded and initialized. This provides the subscriber with its opaque handle, +/// which must be used in subsequent XPTI API calls that require subscriber-specific +/// context. +/// +/// This callback is invoked before any xptiTraceInit() callbacks for streams. +/// Subscribers should perform one-time initialization operations here, such as +/// allocating global data structures or initializing logging systems. +/// +/// @param [in] self The opaque subscriber handle for this subscriber instance. +/// This handle remains valid throughout the subscriber's lifetime +/// (from xptiSubscriberInit to xptiSubscriberFinish) and must be +/// used in XPTI API calls that require subscriber context. +/// +/// @note Subscribers are not required to implement this callback. If not implemented, +/// the framework will continue loading the subscriber normally. +XPTI_CALLBACK_API void xptiSubscriberInit(xpti::subscriber_handle_t self); + +/// @brief Subscriber finalization callback +/// @details This function is called by the XPTI framework once when a subscriber +/// is being unloaded. This callback is invoked after all xptiTraceFinish() callbacks +/// for all streams have been called, marking the end of the subscriber's lifetime. +/// +/// Subscribers should perform final cleanup operations here, such as: +/// - Freeing global data structures allocated in xptiSubscriberInit +/// - Flushing buffered output +/// - Closing files or network connections +/// - Releasing any other resources held by the subscriber +/// +/// @param [in] self The opaque subscriber handle for this subscriber instance. +/// +/// @note Subscribers are not required to implement this callback. If not implemented, +/// the framework will continue unloading the subscriber normally. +XPTI_CALLBACK_API void xptiSubscriberFinish(xpti::subscriber_handle_t self); } diff --git a/xptifw/doc/XPTI_Framework.md b/xptifw/doc/XPTI_Framework.md index 0bee8d33cf37e..9a98c0f4689a6 100644 --- a/xptifw/doc/XPTI_Framework.md +++ b/xptifw/doc/XPTI_Framework.md @@ -114,12 +114,17 @@ ownership of the subscriber is controlled by tools or applications that consume the generated event streams and **must** follow the protocol or handshake defined for an event stream. -There are three important things that a subscriber must implement to be -functional: (1) `xptiTraceInit`, (2) `xptiTraceFinish` and (3) callback -handlers. The `xptiTraceInit` and `xptiTraceFinish` API calls are used by the -dispatcher loading the subscriber dynamically to determine if the subscriber -is a valid subscriber. If these entry points are not present, then the -subscriber is not loaded. +A subscriber must implement at minimum two required entry points to be +considered valid: (1) `xptiTraceInit` and (2) `xptiTraceFinish`. The +dispatcher uses these API calls to validate the subscriber. If these entry +points are not present, then the subscriber is not loaded. + +Additionally, subscribers may optionally implement two lifecycle callbacks: +(3) `xptiSubscriberInit` and (4) `xptiSubscriberFinish`. These callbacks +provide subscriber-level initialization and finalization, as opposed to the +per-stream initialization provided by `xptiTraceInit` and `xptiTraceFinish`. +Finally, subscribers must implement (5) callback handlers for the trace +events they wish to receive. The `xptiTraceInit` callback is called by the dispatcher when the generator of a new stream of data makes a call to `xptiInitialize` for the new stream. The @@ -173,6 +178,31 @@ allocated to handle the stream. The `xptiTraceFinish` call is made by the dispatcher when the instrumented code is winding down a data stream by calling `xptiFinalize` for the stream. +In addition to the per-stream callbacks, subscribers may optionally implement +subscriber-level lifecycle callbacks: + +```cpp +XPTI_CALLBACK_API void xptiSubscriberInit(xpti::subscriber_handle_t self) { + // Called once when the subscriber is loaded and initialized + // Perform one-time initialization here (e.g., allocate global resources) + // The 'self' handle can be used for subscriber-specific XPTI API calls +} + +XPTI_CALLBACK_API void xptiSubscriberFinish(xpti::subscriber_handle_t self) { + // Called once when the subscriber is being unloaded + // This is called after all xptiTraceFinish callbacks + // Perform final cleanup here (e.g., free global resources, flush buffers) +} +``` + +The `xptiSubscriberInit` callback is invoked once when the subscriber is +loaded, before any `xptiTraceInit` calls. It provides the subscriber with an +opaque handle that can be used in subsequent XPTI API calls requiring +subscriber-specific context. The `xptiSubscriberFinish` callback is invoked +once when the subscriber is being unloaded, after all `xptiTraceFinish` calls +have completed. These callbacks are optional; if not implemented, the framework +will continue loading/unloading the subscriber normally. + The implementation of the callbacks is where attention needs to be given to the handshake protocol or specification for a given stream the subscriber wants to attach to and consume the data. The instrumented library may send additional @@ -217,6 +247,8 @@ invariant across all instances of that tracepoint. > **NOTE:** A subscriber **must** implement the `xptiTraceInit` and > `xptiTraceFinish` APIs for the dispatcher to successfully load the subscriber. +> Optionally, a subscriber may implement `xptiSubscriberInit` and +> `xptiSubscriberFinish` for subscriber-level lifecycle management. > **NOTE:** The specification for a given event stream **must** be consulted > before implementing the callback handlers for various trace types. diff --git a/xptifw/samples/basic_collector/basic_collector.cpp b/xptifw/samples/basic_collector/basic_collector.cpp index b325a48b3eb96..235168b300919 100644 --- a/xptifw/samples/basic_collector/basic_collector.cpp +++ b/xptifw/samples/basic_collector/basic_collector.cpp @@ -18,6 +18,7 @@ #include static uint8_t GStreamID = 0; +static xpti::subscriber_handle_t GSubscriberHandle = 0; std::mutex GIOMutex; // The lone callback function we are going to use to demonstrate how to attach @@ -111,6 +112,21 @@ XPTI_CALLBACK_API void xptiTraceFinish(const char *stream_name) { // We do nothing here } +XPTI_CALLBACK_API void xptiSubscriberInit(xpti::subscriber_handle_t self) { + // Called once when the subscriber is loaded + // This is where you would initialize global resources, logging, etc. + printf("Subscriber initialized with handle: %lu\n", self); + + // Store the subscriber handle for use in xptiTraceInit + GSubscriberHandle = self; +} + +XPTI_CALLBACK_API void xptiSubscriberFinish(xpti::subscriber_handle_t self) { + // Called once when the subscriber is being unloaded + // This is where you would clean up global resources + printf("Subscriber finalized with handle: %lu\n", self); +} + XPTI_CALLBACK_API void tpCallback(uint16_t TraceType, xpti::trace_event_data_t *Parent, xpti::trace_event_data_t *Event, diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index 35c3e0c15d84b..fc24bc2dd2e65 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -388,10 +388,16 @@ class Subscribers { struct plugin_data_t { /// The handle of the loaded shared object xpti_plugin_handle_t handle = nullptr; + /// Unique subscriber ID for this plugin + xpti::subscriber_handle_t subscriber_id = 0; /// The initialization entry point xpti::plugin_init_t init = nullptr; /// The finalization entry point xpti::plugin_fini_t fini = nullptr; + /// The subscriber initialization entry point (optional) + xpti::subscriber_init_t subscriber_init = nullptr; + /// The subscriber finalization entry point (optional) + xpti::subscriber_fini_t subscriber_fini = nullptr; /// The name of the shared object (in UTF8?)) std::string name; /// indicates whether the data structure is valid @@ -399,9 +405,10 @@ class Subscribers { }; // Data structures defined to hold the plugin data that can be looked up by - // plugin name or the handle + // plugin name, library handle, or subscriber ID using plugin_handle_lut_t = std::map; using plugin_name_lut_t = std::map; + using subscriber_id_lut_t = std::map; // We unload all loaded shared objects in the destructor; Must not be invoked // in the DLLMain() function and possibly the __fini() function in Linux @@ -463,17 +470,32 @@ class Subscribers { g_helper.findFunction(Handle, "xptiTraceFinish")); if (InitFunc && FiniFunc) { // We appear to have loaded a valid plugin, so we will insert the - // plugin information into the two maps guarded by a lock + // plugin information into the three maps guarded by a lock plugin_data_t Data; Data.valid = true; Data.handle = Handle; Data.name = Path; Data.init = InitFunc; Data.fini = FiniFunc; + // Generate unique subscriber ID + Data.subscriber_id = MNextSubscriberID.fetch_add(1, std::memory_order_relaxed); + + // Look for optional subscriber lifecycle callbacks + Data.subscriber_init = reinterpret_cast( + g_helper.findFunction(Handle, "xptiSubscriberInit")); + Data.subscriber_fini = reinterpret_cast( + g_helper.findFunction(Handle, "xptiSubscriberFinish")); + { std::lock_guard Lock(MMutex); MNameLUT[Path] = Data; MHandleLUT[Handle] = Data; + MSubscriberIDLUT[Data.subscriber_id] = Data; + } + + // Call subscriber initialization callback if present + if (Data.subscriber_init) { + Data.subscriber_init(Data.subscriber_id); } } else { // We may have loaded another shared object that is not a tool plugin @@ -578,13 +600,26 @@ class Subscribers { } } + /// Checks if a subscriber ID is valid. + /// @param subscriber_id The subscriber ID to validate. + /// @return true if the subscriber ID exists, false otherwise. + bool isValidSubscriberID(xpti::subscriber_handle_t subscriber_id) { + std::lock_guard Lock(MMutex); + return MSubscriberIDLUT.find(subscriber_id) != MSubscriberIDLUT.end(); + } + /// Unloads all loaded plugins. void unloadAllPlugins() { for (auto &Item : MNameLUT) { + // Call subscriber finalization callback if present + if (Item.second.subscriber_fini) { + Item.second.subscriber_fini(Item.second.subscriber_id); + } unloadPlugin(Item.second.handle); } MHandleLUT.clear(); MNameLUT.clear(); + MSubscriberIDLUT.clear(); } private: @@ -592,6 +627,10 @@ class Subscribers { plugin_name_lut_t MNameLUT; /// Hash map that maps shared object handle to the plugin data plugin_handle_lut_t MHandleLUT; + /// Hash map that maps subscriber ID to the plugin data + subscriber_id_lut_t MSubscriberIDLUT; + /// Counter for generating unique subscriber IDs + std::atomic MNextSubscriberID{1}; /// Lock to ensure the operation on these maps are safe std::mutex MMutex; /// Mutex to ensure that plugin loading is thread-safe. From 0e4464e592d55b0b1d00efde25377bbc5fff5979 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 2 Apr 2026 08:05:40 +0200 Subject: [PATCH 2/5] [XPTI] Add per-subscriber stream detail level control This commit introduces two new APIs that allow subscribers to request different levels of optional data emission from producers on a per-stream basis: - xptiSetSubscriberStreamDetailLevel: Subscribers set their desired detail level for a specific stream - xptiGetEffectiveStreamDetailLevel: Producers query the effective detail level to determine what optional data to emit Key features: - Four ordered detail levels: NONE, BASIC, NORMAL (default), VERBOSE - Aggregation rule: effective level is the maximum across all subscribers - Lock-free atomic reads for the get API (optimized for hot path) - Recalculation only on set (cold path during initialization) - Backward compatible: defaults to NORMAL level Implementation details: - Uses fixed-size atomic array indexed by stream_id for O(1) lock-free reads - Effective levels cached per stream and updated only when subscribers change their requests - Comprehensive documentation in XPTI_Framework.md with usage examples --- xpti/include/xpti/xpti_data_types.h | 33 ++++ xpti/include/xpti/xpti_trace_framework.h | 86 ++++++++++ xptifw/doc/XPTI_Framework.md | 168 +++++++++++++++++++ xptifw/src/xpti_trace_framework.cpp | 203 ++++++++++++++++++++++- 4 files changed, 488 insertions(+), 2 deletions(-) diff --git a/xpti/include/xpti/xpti_data_types.h b/xpti/include/xpti/xpti_data_types.h index f93f4a9220c15..edb8e942e0258 100644 --- a/xpti/include/xpti/xpti_data_types.h +++ b/xpti/include/xpti/xpti_data_types.h @@ -352,6 +352,39 @@ enum class payload_flag_t { using stream_id_t = uint8_t; +/// @enum stream_detail_level_t +/// @brief Defines the detail level for stream data emission. +/// +/// This enum specifies the level of detail that should be emitted on a stream. +/// Subscribers can request a specific detail level for each stream they +/// subscribe to, and the framework will compute the effective detail level as +/// the maximum across all subscribers for that stream. +/// +/// The values are ordered to allow threshold checks: +/// producers can use comparisons like `if (level >= XPTI_STREAM_DETAIL_LEVEL_NORMAL)` +/// to determine what data to emit. +/// +/// @var stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NONE +/// No optional data should be emitted. Only essential trace points are reported. +/// +/// @var stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_BASIC +/// Emit basic optional data. Suitable for lightweight profiling with minimal overhead. +/// +/// @var stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL +/// Emit normal level of optional data. This is the default level providing a +/// balance between detail and overhead. +/// +/// @var stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE +/// Emit verbose optional data. Provides maximum detail for deep analysis, +/// potentially with higher overhead. +/// +enum class stream_detail_level_t : uint8_t { + XPTI_STREAM_DETAIL_LEVEL_NONE = 0, + XPTI_STREAM_DETAIL_LEVEL_BASIC = 1, + XPTI_STREAM_DETAIL_LEVEL_NORMAL = 2, + XPTI_STREAM_DETAIL_LEVEL_VERBOSE = 3 +}; + // // Helper macros for creating new tracepoint and // event types diff --git a/xpti/include/xpti/xpti_trace_framework.h b/xpti/include/xpti/xpti_trace_framework.h index 99ee1a5fb2f16..281c6094e7380 100644 --- a/xpti/include/xpti/xpti_trace_framework.h +++ b/xpti/include/xpti/xpti_trace_framework.h @@ -1003,6 +1003,88 @@ XPTI_EXPORT_API bool xptiCheckTracepointScopeNotification(); /// @param e The event for which associated data will be removed XPTI_EXPORT_API void xptiReleaseEvent(xpti::trace_event_data_t *e); +/// @brief Sets the stream detail level for a specific subscriber. +/// +/// This function allows a subscriber to request a specific detail level for +/// data emission on a given stream. The effective detail level for the stream +/// will be the maximum across all subscribers for that stream. This enables +/// subscribers to control the amount of optional data emitted by producers +/// without affecting other subscribers' needs. +/// +/// The aggregation rule ensures that one subscriber cannot reduce the emitted +/// data if another subscriber still needs more detail. This prevents +/// information loss when multiple subscribers with different detail level +/// requirements are active on the same stream. +/// +/// @param subscriber The opaque subscriber handle for the subscriber requesting +/// the detail level. This handle is provided to the subscriber +/// during its initialization via xptiSubscriberInit(). +/// @param stream The stream ID for which the detail level is being set. +/// @param level The requested detail level from xpti::stream_detail_level_t. +/// Valid values are: +/// - XPTI_STREAM_DETAIL_LEVEL_NONE: No optional data +/// - XPTI_STREAM_DETAIL_LEVEL_BASIC: Basic optional data +/// - XPTI_STREAM_DETAIL_LEVEL_NORMAL: Normal detail (default) +/// - XPTI_STREAM_DETAIL_LEVEL_VERBOSE: Maximum detail +/// +/// @return Returns `xpti::result_t::XPTI_RESULT_SUCCESS` if the detail level +/// was successfully set. Returns `xpti::result_t::XPTI_RESULT_INVALIDARG` +/// if the subscriber ID is invalid. +/// +/// @note This API is intended to be called by subscribers, typically during +/// their initialization (xptiSubscriberInit) or stream initialization +/// (xptiTraceInit) callbacks. +/// +/// @see xptiGetEffectiveStreamDetailLevel +/// @see xpti::stream_detail_level_t +/// +XPTI_EXPORT_API xpti::result_t xptiSetSubscriberStreamDetailLevel( + xpti::subscriber_handle_t subscriber, xpti::stream_id_t stream, + xpti::stream_detail_level_t level); + +/// @brief Gets the effective stream detail level for a stream. +/// +/// This function returns the effective detail level for a given stream, which +/// is computed as the maximum requested detail level across all subscribers +/// for that stream. Producers can use this API to determine what level of +/// optional data they should emit on the stream. +/// +/// The effective level implements an aggregation rule: if any subscriber needs +/// a higher detail level, that level becomes effective for the entire stream. +/// This ensures that all subscribers receive the data they need, though it may +/// mean some subscribers receive more data than they requested. +/// +/// Producers should call this function when determining what optional data to +/// emit on a stream. The enum values are ordered to support threshold checks: +/// +/// @code +/// auto level = xptiGetEffectiveStreamDetailLevel(stream_id); +/// if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { +/// // Emit normal-level optional data +/// } +/// if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE) { +/// // Emit verbose-level optional data +/// } +/// @endcode +/// +/// @param stream The stream ID for which the effective detail level is requested. +/// +/// @return The effective detail level for the stream. If no subscriber has +/// explicitly set a level for the stream, returns +/// `xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL` as +/// the default, ensuring backward compatibility with existing behavior. +/// +/// @note This API is lock-free and uses atomics for very fast reads. The +/// effective level is cached per stream and only recalculated when +/// subscribers change their detail level requests. This makes it suitable +/// for use in hot paths where producers frequently emit trace data. +/// +/// @see xptiSetSubscriberStreamDetailLevel +/// @see xpti::stream_detail_level_t +/// +XPTI_EXPORT_API xpti::stream_detail_level_t +xptiGetEffectiveStreamDetailLevel(xpti::stream_id_t stream); + typedef xpti::result_t (*xpti_framework_initialize_t)(); typedef xpti::result_t (*xpti_framework_finalize_t)(); typedef xpti::result_t (*xpti_initialize_t)(const char *, uint32_t, uint32_t, @@ -1072,4 +1154,8 @@ typedef xpti_tracepoint_t *(*xpti_create_tracepoint_t)(const char *, const char *, uint32_t, uint32_t, void *); typedef xpti::result_t (*xpti_delete_tracepoint_t)(xpti_tracepoint_t *); +typedef xpti::result_t (*xpti_set_subscriber_stream_detail_level_t)( + xpti::subscriber_handle_t, xpti::stream_id_t, xpti::stream_detail_level_t); +typedef xpti::stream_detail_level_t (*xpti_get_effective_stream_detail_level_t)( + xpti::stream_id_t); } diff --git a/xptifw/doc/XPTI_Framework.md b/xptifw/doc/XPTI_Framework.md index 9a98c0f4689a6..fdeb9129ad590 100644 --- a/xptifw/doc/XPTI_Framework.md +++ b/xptifw/doc/XPTI_Framework.md @@ -20,6 +20,13 @@ - [`xptiMakeEvent`](#xptimakeevent) - [Notifying the registered listeners](#notifying-the-registered-listeners) - [`xptiNotifySubscribers`](#xptinotifysubscribers) + - [Stream Detail Level Control](#stream-detail-level-control) + - [Overview](#overview-1) + - [Detail Level Enum](#detail-level-enum) + - [Aggregation Rule](#aggregation-rule) + - [Subscriber Usage](#subscriber-usage) + - [Producer Usage](#producer-usage) + - [API Reference](#api-reference) - [Performance of the Framework](#performance-of-the-framework) - [Modeling and projection](#modeling-and-projection) - [Computing the cost incurred in the framework](#computing-the-cost-incurred-in-the-framework) @@ -731,6 +738,167 @@ void function1() { } ``` +### Stream Detail Level Control + +Starting from version 1.1.0, the XPTI framework supports per-subscriber stream +detail level control. This feature allows subscribers to request different +levels of optional data emission from producers on a per-stream basis, enabling +fine-grained control over tracing overhead without sacrificing functionality for +subscribers that need more detailed information. + +#### Overview + +Stream detail level control is designed to address scenarios where: +- Multiple subscribers consume data from the same stream but have different detail requirements +- Producers emit optional data that may be expensive to generate +- Users want to minimize tracing overhead for certain subscribers without affecting others + +The framework provides two new APIs for this feature: +- `xptiSetSubscriberStreamDetailLevel`: Allows a subscriber to request a specific detail level for a stream +- `xptiGetEffectiveStreamDetailLevel`: Allows producers to query the effective detail level for a stream + +#### Detail Level Enum + +The `xpti::stream_detail_level_t` enum defines four ordered levels: + +```cpp +enum class stream_detail_level_t : uint8_t { + XPTI_STREAM_DETAIL_LEVEL_NONE = 0, // No optional data + XPTI_STREAM_DETAIL_LEVEL_BASIC = 1, // Basic optional data + XPTI_STREAM_DETAIL_LEVEL_NORMAL = 2, // Normal detail (default) + XPTI_STREAM_DETAIL_LEVEL_VERBOSE = 3 // Maximum detail +}; +``` + +The values are ordered to support threshold checks in producer code: + +```cpp +auto level = xptiGetEffectiveStreamDetailLevel(stream_id); +if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + // Emit normal-level optional data +} +if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE) { + // Emit verbose-level optional data +} +``` + +#### Aggregation Rule + +The effective detail level for a stream is computed as the **maximum** requested +level across all subscribers for that stream. This aggregation rule ensures that: +- One subscriber cannot reduce the detail level if another subscriber still needs more information +- All subscribers receive at least the data they requested (though some may receive more) +- Backward compatibility is maintained (default is NORMAL) + +For example: +- If Subscriber A requests BASIC and Subscriber B requests VERBOSE, the effective level is VERBOSE +- If no subscriber sets a level explicitly, the default NORMAL level is used +- If Subscriber A unsubscribes, the effective level becomes the maximum of remaining subscribers + +#### Subscriber Usage + +Subscribers should request their desired detail level during initialization: + +```cpp +XPTI_CALLBACK_API void xptiSubscriberInit(xpti::subscriber_handle_t self) { + // Store the subscriber handle for later use + g_subscriber_handle = self; +} + +XPTI_CALLBACK_API void xptiTraceInit(unsigned int major_version, + unsigned int minor_version, + const char *version_str, + const char *stream_name) { + if (std::string("sycl") == stream_name) { + xpti::stream_id_t stream_id = xptiRegisterStream(stream_name); + + // Request basic detail level for this subscriber on the SYCL stream + xptiSetSubscriberStreamDetailLevel( + g_subscriber_handle, + stream_id, + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_BASIC); + + // Register callbacks as usual + xptiRegisterCallback(stream_id, + (uint16_t)xpti::trace_point_type_t::task_begin, + my_callback); + } +} +``` + +#### Producer Usage + +Producers should query the effective detail level before emitting optional data: + +```cpp +void emit_trace_data(xpti::stream_id_t stream_id, const TraceData& data) { + // Always emit essential trace points + xptiNotifySubscribers(stream_id, trace_type, parent, event, instance, &data); + + // Check if we should emit optional metadata + auto level = xptiGetEffectiveStreamDetailLevel(stream_id); + + if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + // Emit normal-level optional metadata + xpti::object_id_t value_id = xptiRegisterObject(&data.optional_info, + sizeof(data.optional_info), + 0); + xptiAddMetadata(event, "optional_info", value_id); + } + + if (level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE) { + // Emit verbose-level optional metadata (potentially expensive) + compute_and_emit_detailed_analysis(event); + } +} +``` + +#### API Reference + +##### `xptiSetSubscriberStreamDetailLevel` + +```cpp +xpti::result_t xptiSetSubscriberStreamDetailLevel( + xpti::subscriber_handle_t subscriber, + xpti::stream_id_t stream, + xpti::stream_detail_level_t level); +``` + +Sets the stream detail level for a specific subscriber on a stream. + +**Parameters:** +- `subscriber`: The opaque subscriber handle (provided via `xptiSubscriberInit`) +- `stream`: The stream ID for which the detail level is being set +- `level`: The requested detail level + +**Returns:** +- `XPTI_RESULT_SUCCESS` if the detail level was successfully set +- `XPTI_RESULT_INVALIDARG` if the subscriber ID is invalid + +**Note:** This API should typically be called during subscriber initialization +(`xptiSubscriberInit`) or stream initialization (`xptiTraceInit`). + +##### `xptiGetEffectiveStreamDetailLevel` + +```cpp +xpti::stream_detail_level_t xptiGetEffectiveStreamDetailLevel( + xpti::stream_id_t stream); +``` + +Gets the effective detail level for a stream (maximum across all subscribers). + +**Parameters:** +- `stream`: The stream ID for which the effective detail level is requested + +**Returns:** +- The effective detail level for the stream +- If no subscriber has set a level, returns `XPTI_STREAM_DETAIL_LEVEL_NORMAL` (default) + +**Performance Note:** This API is lock-free and uses atomics for very fast reads. +The effective level is cached per stream and only recalculated when subscribers +change their detail level requests. This makes it suitable for use in hot paths +where producers emit trace data. + ## Performance of the Framework In order to estimate the overheads one could experience by using the framework, diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index fc24bc2dd2e65..89682673d34eb 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -1559,7 +1559,15 @@ class Notifications { using stream_flags_t = emhash7::HashMap; Notifications(size_t size = 512) - : MCallbacksByStream(size), MStreamFlags(size) {} + : MCallbacksByStream(size), MStreamFlags(size) { + // Initialize all effective stream detail levels to default (NORMAL) + for (size_t i = 0; i < 256; ++i) { + MEffectiveStreamDetailLevels[i].store( + static_cast( + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL), + std::memory_order_relaxed); + } + } /// @brief Registers a callback function for a specific trace type and stream /// ID. @@ -1904,7 +1912,78 @@ class Notifications { #endif } - void clear() { MCallbacksByStream.clear(); } + void clear() { + MCallbacksByStream.clear(); + std::lock_guard Lock(MDetailLevelLock); + MStreamDetailLevels.clear(); + // Reset all effective levels to default + for (size_t i = 0; i < 256; ++i) { + MEffectiveStreamDetailLevels[i].store( + static_cast( + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL), + std::memory_order_relaxed); + } + } + + /// @brief Sets the stream detail level for a specific subscriber on a stream. + /// + /// This function allows a subscriber to request a specific detail level for + /// data emission on a given stream. The effective detail level for the stream + /// will be the maximum across all subscribers. + /// + /// @param subscriber The subscriber handle requesting the detail level. + /// @param stream The stream ID for which the detail level is being set. + /// @param level The requested detail level. + /// + /// @return Returns `xpti::result_t::XPTI_RESULT_SUCCESS` if the detail level + /// was successfully set. + xpti::result_t setSubscriberStreamDetailLevel( + xpti::subscriber_handle_t subscriber, xpti::stream_id_t stream, + xpti::stream_detail_level_t level) { + std::lock_guard Lock(MDetailLevelLock); + + // Update the subscriber's requested level for this stream + MStreamDetailLevels[subscriber][stream] = level; + + // Recalculate the effective level for this stream (max across all subscribers) + uint8_t max_level = static_cast( + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL); + + for (const auto &subscriber_entry : MStreamDetailLevels) { + const auto &stream_levels = subscriber_entry.second; + auto it = stream_levels.find(stream); + if (it != stream_levels.end()) { + uint8_t requested_level = static_cast(it->second); + if (requested_level > max_level) { + max_level = requested_level; + } + } + } + + // Update the cached effective level atomically + MEffectiveStreamDetailLevels[stream].store(max_level, std::memory_order_release); + + return xpti::result_t::XPTI_RESULT_SUCCESS; + } + + /// @brief Gets the effective stream detail level for a stream. + /// + /// The effective detail level is the maximum requested level across all + /// subscribers for the given stream. If no subscriber has set a level for + /// the stream, the default (NORMAL) is returned. + /// + /// This is a fast lock-free operation using atomics. + /// + /// @param stream The stream ID for which the effective detail level is + /// requested. + /// + /// @return The effective detail level for the stream. + xpti::stream_detail_level_t + getEffectiveStreamDetailLevel(xpti::stream_id_t stream) { + // Fast lock-free read from the cached effective level array + uint8_t level = MEffectiveStreamDetailLevels[stream].load(std::memory_order_acquire); + return static_cast(level); + } private: #ifdef XPTI_STATISTICS @@ -1962,6 +2041,28 @@ class Notifications { mutable xpti::SharedSpinLock MFlagsLock; statistics_t MStats; stream_flags_t MStreamFlags; + + /// @typedef stream_detail_levels_t + /// @brief Maps stream IDs to their requested detail levels. + using stream_detail_levels_t = + phmap::flat_hash_map; + + /// @typedef subscriber_detail_levels_t + /// @brief Maps subscriber handles to their per-stream detail level requests. + using subscriber_detail_levels_t = + phmap::flat_hash_map; + + /// Map tracking detail level requests per subscriber per stream + /// Used only during setSubscriberStreamDetailLevel to recalculate effective levels + subscriber_detail_levels_t MStreamDetailLevels; + + /// Cached effective detail levels per stream (indexed by stream_id) + /// This is a lock-free array of atomics for fast reads by producers + /// stream_id is uint8_t, so we need 256 entries + std::array, 256> MEffectiveStreamDetailLevels; + + /// Mutex protecting access to detail level data structures during updates + std::mutex MDetailLevelLock; }; /// @class Framework @@ -2513,6 +2614,44 @@ class Framework { bool hasSubscribers() { return MSubscribers.hasValidSubscribers(); } + /// @brief Sets the stream detail level for a specific subscriber. + /// + /// Allows a subscriber to request a specific detail level for data emission + /// on a given stream. The effective detail level for the stream will be the + /// maximum across all subscribers. + /// + /// @param subscriber The subscriber handle requesting the detail level. + /// @param stream The stream ID for which the detail level is being set. + /// @param level The requested detail level. + /// + /// @return Returns `xpti::result_t::XPTI_RESULT_SUCCESS` if the detail level + /// was successfully set, `xpti::result_t::XPTI_RESULT_INVALIDARG` if + /// the subscriber ID is invalid. + xpti::result_t setSubscriberStreamDetailLevel( + xpti::subscriber_handle_t subscriber, xpti::stream_id_t stream, + xpti::stream_detail_level_t level) { + // Validate subscriber ID + if (!MSubscribers.isValidSubscriberID(subscriber)) { + return xpti::result_t::XPTI_RESULT_INVALIDARG; + } + return MNotifier.setSubscriberStreamDetailLevel(subscriber, stream, level); + } + + /// @brief Gets the effective stream detail level for a stream. + /// + /// The effective detail level is the maximum requested level across all + /// subscribers for the given stream. If no subscriber has set a level for + /// the stream, the default (NORMAL) is returned. + /// + /// @param stream The stream ID for which the effective detail level is + /// requested. + /// + /// @return The effective detail level for the stream. + xpti::stream_detail_level_t + getEffectiveStreamDetailLevel(xpti::stream_id_t stream) { + return MNotifier.getEffectiveStreamDetailLevel(stream); + } + xpti::result_t finalizeStream(const char *Stream) { if (!Stream) return xpti::result_t::XPTI_RESULT_INVALIDARG; @@ -3771,6 +3910,66 @@ XPTI_EXPORT_API void xptiReleaseEvent(xpti::trace_event_data_t *Event) { return xpti::Framework::instance().releaseEvent(Event); } +/// @brief Sets the stream detail level for a specific subscriber. +/// +/// This function allows a subscriber to request a specific detail level for +/// data emission on a given stream. The effective detail level for the stream +/// will be the maximum across all subscribers for that stream. This enables +/// subscribers to control the amount of optional data emitted by producers +/// without affecting other subscribers' needs. +/// +/// @param subscriber The opaque subscriber handle for the subscriber requesting +/// the detail level. This handle is provided to the subscriber +/// during its initialization via xptiSubscriberInit(). +/// @param stream The stream ID for which the detail level is being set. +/// @param level The requested detail level from xpti::stream_detail_level_t. +/// +/// @return Returns `xpti::result_t::XPTI_RESULT_SUCCESS` if the detail level +/// was successfully set. Returns `xpti::result_t::XPTI_RESULT_INVALIDARG` +/// if the subscriber ID is invalid. +/// +/// @note The effective detail level for a stream is the maximum across all +/// subscribers. One subscriber cannot reduce the detail level if another +/// subscriber still needs more detail. +/// +XPTI_EXPORT_API xpti::result_t xptiSetSubscriberStreamDetailLevel( + xpti::subscriber_handle_t subscriber, xpti::stream_id_t stream, + xpti::stream_detail_level_t level) { + return xpti::Framework::instance().setSubscriberStreamDetailLevel( + subscriber, stream, level); +} + +/// @brief Gets the effective stream detail level for a stream. +/// +/// This function returns the effective detail level for a given stream, which +/// is computed as the maximum requested detail level across all subscribers +/// for that stream. Producers can use this API to determine what level of +/// optional data they should emit on the stream. +/// +/// The effective level implements an aggregation rule: if any subscriber needs +/// a higher detail level, that level becomes effective for the entire stream. +/// This ensures that all subscribers receive the data they need, though it may +/// mean some subscribers receive more data than they requested. +/// +/// @param stream The stream ID for which the effective detail level is requested. +/// +/// @return The effective detail level for the stream. If no subscriber has +/// explicitly set a level for the stream, returns +/// `xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL` as +/// the default. +/// +/// @note This is a lock-free operation optimized for frequent calls in hot +/// paths. The effective level is cached per stream using atomics and only +/// recalculated when subscribers update their detail level requests. +/// Producers should call this function to determine what optional data +/// to emit. They can use threshold checks like: +/// `if (level >= XPTI_STREAM_DETAIL_LEVEL_NORMAL)` +/// +XPTI_EXPORT_API xpti::stream_detail_level_t +xptiGetEffectiveStreamDetailLevel(xpti::stream_id_t stream) { + return xpti::Framework::instance().getEffectiveStreamDetailLevel(stream); +} + } // extern "C" #if (defined(_WIN32) || defined(_WIN64)) From bd27aec207086da7b9fd6068f31b651c9158bfe2 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 2 Apr 2026 08:43:50 +0200 Subject: [PATCH 3/5] Define new APIs in proxy --- sycl/source/detail/xpti_registry.hpp | 13 +++++++---- xpti/src/xpti_proxy.cpp | 34 +++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/xpti_registry.hpp b/sycl/source/detail/xpti_registry.hpp index 9598eefab7b27..78b68424820ea 100644 --- a/sycl/source/detail/xpti_registry.hpp +++ b/sycl/source/detail/xpti_registry.hpp @@ -40,10 +40,11 @@ constexpr const char *GVerStr = SYCL_VERSION_STR; /// We define all the streams used the instrumentation framework here inline constexpr const char *SYCL_STREAM_NAME = "sycl"; -// We will use "sycl.debug" stream name as an indicator of needing debugging -// information; in this case, the tool will have to subscribe to the sycl.debug -// stream to get additional debug metadata, but the metadata will still be sent -// through the regular streams. +// The "sycl.debug" stream is maintained for full backward compatibility. +// However, new tools should use xptiSetSubscriberStreamDetailLevel() on the +// "sycl" stream to control metadata detail (BASIC, NORMAL, or VERBOSE) instead +// of subscribing to "sycl.debug". When a subscriber registers for "sycl.debug", +// verbose metadata will be emitted to that stream for backward compatibility. inline constexpr const char *SYCL_DEBUG_STREAM_NAME = "sycl.debug"; inline constexpr auto SYCL_MEM_ALLOC_STREAM_NAME = "sycl.experimental.mem_alloc"; @@ -75,6 +76,10 @@ inline bool isDebugStream(xpti::stream_id_t StreamID) { } inline uint8_t getActiveXPTIStreamID() { + // Return sycl.debug stream if subscribers are listening to it (backward compatibility), + // otherwise return regular sycl stream. + // New subscribers should use xptiSetSubscriberStreamDetailLevel() on the sycl stream + // for detail level control instead of subscribing to sycl.debug. return xptiCheckTraceEnabled(detail::GSYCLDebugStreamID) ? detail::GSYCLDebugStreamID : detail::GSYCLStreamID; diff --git a/xpti/src/xpti_proxy.cpp b/xpti/src/xpti_proxy.cpp index 6400b6824c1a2..441c94c447ef5 100644 --- a/xpti/src/xpti_proxy.cpp +++ b/xpti/src/xpti_proxy.cpp @@ -62,6 +62,8 @@ enum functions_t : unsigned { XPTI_SET_DEFAULT_EVENT_TYPE, XPTI_GET_DEFAULT_TRACE_TYPE, XPTI_SET_DEFAULT_TRACE_TYPE, + XPTI_SET_SUBSCRIBER_STREAM_DETAIL_LEVEL, + XPTI_GET_EFFECTIVE_STREAM_DETAIL_LEVEL, // All additional functions need to appear before // the XPTI_FW_API_COUNT enum XPTI_FW_API_COUNT ///< This enum must always be the last one in the list @@ -119,7 +121,11 @@ class ProxyLoader { {XPTI_SET_DEFAULT_EVENT_TYPE, "xptiSetDefaultEventType"}, {XPTI_GET_DEFAULT_TRACE_TYPE, "xptiGetDefaultTraceType"}, {XPTI_SET_DEFAULT_TRACE_TYPE, "xptiSetDefaultTraceType"}, - {XPTI_RELEASE_EVENT, "xptiReleaseEvent"}}; + {XPTI_RELEASE_EVENT, "xptiReleaseEvent"}, + {XPTI_SET_SUBSCRIBER_STREAM_DETAIL_LEVEL, + "xptiSetSubscriberStreamDetailLevel"}, + {XPTI_GET_EFFECTIVE_STREAM_DETAIL_LEVEL, + "xptiGetEffectiveStreamDetailLevel"}}; public: typedef std::vector dispatch_table_t; @@ -750,3 +756,29 @@ xptiSetDefaultTraceType(xpti::trace_point_type_t trace_type) { } return xpti::result_t::XPTI_RESULT_FAIL; } + +XPTI_EXPORT_API xpti::result_t xptiSetSubscriberStreamDetailLevel( + xpti::subscriber_handle_t subscriber, xpti::stream_id_t stream, + xpti::stream_detail_level_t level) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex( + XPTI_SET_SUBSCRIBER_STREAM_DETAIL_LEVEL); + if (f) { + return (*(xpti_set_subscriber_stream_detail_level_t)f)(subscriber, stream, + level); + } + } + return xpti::result_t::XPTI_RESULT_FAIL; +} + +XPTI_EXPORT_API xpti::stream_detail_level_t +xptiGetEffectiveStreamDetailLevel(xpti::stream_id_t stream) { + if (xpti::ProxyLoader::instance().noErrors()) { + auto f = xpti::ProxyLoader::instance().functionByIndex( + XPTI_GET_EFFECTIVE_STREAM_DETAIL_LEVEL); + if (f) { + return (*(xpti_get_effective_stream_detail_level_t)f)(stream); + } + } + return xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL; +} From c6d55c05376d7cfa19a8bd82dc105234b9b3a184 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Thu, 2 Apr 2026 08:45:35 +0200 Subject: [PATCH 4/5] [XPTI] Apply stream detail control levels for sycl stream Apply stream detail level control to SYCL RT while preserving full backward compatibility with existing sycl.debug stream subscribers. Changes: - Use dual-path checks (Level >= VERBOSE || isDebugStream()) for verbose metadata emission in commands.cpp and queue_impl.cpp - This ensures verbose metadata is emitted when either: * A subscriber requests VERBOSE on sycl stream (new approach), OR * A subscriber registers for sycl.debug stream (old approach) - Update documentation to describe detail level control feature - Add metadata table showing which detail level provides each field Backward compatibility: - Existing tools using sycl.debug continue to work unchanged - New tools can request BASIC level for minimal overhead (PTI use case) - Both approaches can coexist during migration period --- .../design/SYCLInstrumentationUsingXPTI.md | 77 ++++++++--- sycl/source/detail/queue_impl.cpp | 19 ++- sycl/source/detail/scheduler/commands.cpp | 129 ++++++++++++------ 3 files changed, 157 insertions(+), 68 deletions(-) diff --git a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md index d0c7c3e59869a..4a17c93ae258f 100644 --- a/sycl/doc/design/SYCLInstrumentationUsingXPTI.md +++ b/sycl/doc/design/SYCLInstrumentationUsingXPTI.md @@ -244,6 +244,37 @@ All trace point types in bold provide semantic information about the graph, node The `"sycl.debug"` stream emits the same notifications as the `"sycl"` stream, but with additional metadata. If toolchains want to keep the overhead low then subscribing to `"sycl"` stream is the right option, if toolchains want to get more data and keeping overheads low is not important then they should subscribe to `"sycl.debug"`. If a tool subscribes to both `"sycl"` and `"sycl.debug"`, only notifications from `"sycl.debug"` will be delivered to avoid duplication. + +### Stream Detail Level Control + +Starting from XPTI v1.1.0, subscribers can control the amount of metadata emitted on the `"sycl"` stream using `xptiSetSubscriberStreamDetailLevel()`. This provides fine-grained control over tracing overhead: + +- **BASIC**: Only essential metadata (`kernel_name`, `memory_object`) +- **NORMAL** (default): BASIC + device info + memory operation details +- **VERBOSE**: NORMAL + debug symbols + kernel arguments (equivalent to `sycl.debug`) + +Example usage: +```cpp +static xpti::subscriber_handle_t g_subscriber_handle; + +XPTI_CALLBACK_API void xptiSubscriberInit(xpti::subscriber_handle_t self) { + g_subscriber_handle = self; +} + +XPTI_CALLBACK_API void xptiTraceInit(unsigned int, unsigned int, + const char*, const char *stream_name) { + if (std::string("sycl") == stream_name) { + xpti::stream_id_t stream_id = xptiRegisterStream(stream_name); + // Request BASIC level for minimal overhead + xptiSetSubscriberStreamDetailLevel( + g_subscriber_handle, stream_id, + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_BASIC); + // Register callbacks as usual... + } +} +``` + +The `"sycl.debug"` stream remains fully supported for backward compatibility and automatically provides VERBOSE-level metadata. | Trace Point Type | Parameter Description | Metadata | | :----------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | **`graph_create`** |
  • **trace_type**: `xpti::trace_point_type_t::graph_create` that marks the creation of an asynchronous graph.
  • **parent**: `nullptr`
  • **event**: The global asynchronous graph object ID. All other graph related events such as node and edge creation will always this ID as the parent ID.
  • **instance**: Unique ID related to the event, but not a correlation ID as there are other events to correlate to.
  • **user_data**: `nullptr`
  • SYCL runtime will always have one instance of a graph object with many disjoint subgraphs that get created during the execution of an application.
    | None | @@ -262,28 +293,30 @@ If a tool subscribes to both `"sycl"` and `"sycl.debug"`, only notifications fro ### Metadata description -| Metadata | Type | Description | -| :--------------------: | :-------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `access_mode` | `int` | Value of `sycl::access::mode` enum | -| `access_range_start` | `size_t` | Start of accessor range | -| `access_range_end` | `size_t` | End of accessor range | -| `allocation_type` | C-style string | Allocation type | -| `copy_from` | `size_t` | ID of source device | -| `copy_to` | `size_t` | ID of target device | -| `event` | `size_t` | Unique identifier of event | -| `from_source` | `bool` | `true` if kernel comes from user source | -| `kernel_name` | C-style string | Kernel name | -| `memory_object` | `size_t` | Unique identifier of memory object | -| `offset` | `size_t` | Accessor offset size in bytes | -| `sycl_device` | `size_t` | Unique identifier of SYCL device | -| `sycl_device_type` | C-style string | `CPU`, `GPU`, `ACC`, or `HOST` | -| `sycl_device_name` | C-style string | Result of `sycl::device::get_info()` | -| `sym_function_name` | C-style string | Function name | -| `sym_source_file_name` | C-style string | Source file name | -| `sym_line_no` | `int32_t` | File line number | -| `sym_column_no` | `int32_t` | File column number | -| `enqueue_kernel_data` | `xpti::offload_kernel_arg_data_t` | Includes kernel execution parameters (global size, local size, offset) and number of kernel arguments | -| `argN` | `xpti::offload_kernel_arg_data_t` | Description for the Nth kernel argument. It includes argument kind (sycl::detail::kernel_param_kind_t), pointer to the value, size and index in the argument list. | +| Metadata | Type | Detail Level | Description | +| :--------------------: | :-------------------------------: | :----------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `kernel_name` | C-style string | BASIC | Kernel name | +| `memory_object` | `size_t` | BASIC | Unique identifier of memory object | +| `sycl_device` | `size_t` | NORMAL | Unique identifier of SYCL device | +| `sycl_device_type` | C-style string | NORMAL | `CPU`, `GPU`, `ACC`, or `HOST` | +| `sycl_device_name` | C-style string | NORMAL | Result of `sycl::device::get_info()` | +| `offset` | `size_t` | NORMAL | Accessor offset size in bytes | +| `access_range_start` | `size_t` | NORMAL | Start of accessor range | +| `access_range_end` | `size_t` | NORMAL | End of accessor range | +| `allocation_type` | C-style string | NORMAL | Allocation type | +| `copy_from` | `size_t` | NORMAL | ID of source device | +| `copy_to` | `size_t` | NORMAL | ID of target device | +| `access_mode` | `int` | NORMAL | Value of `sycl::access::mode` enum | +| `event` | `size_t` | NORMAL | Unique identifier of event | +| `from_source` | `bool` | VERBOSE | `true` if kernel comes from user source | +| `sym_function_name` | C-style string | VERBOSE | Function name | +| `sym_source_file_name` | C-style string | VERBOSE | Source file name | +| `sym_line_no` | `int32_t` | VERBOSE | File line number | +| `sym_column_no` | `int32_t` | VERBOSE | File column number | +| `enqueue_kernel_data` | `xpti::offload_kernel_arg_data_t` | VERBOSE | Includes kernel execution parameters (global size, local size, offset) and number of kernel arguments | +| `argN` | `xpti::offload_kernel_arg_data_t` | VERBOSE | Description for the Nth kernel argument. It includes argument kind (sycl::detail::kernel_param_kind_t), pointer to the value, size and index in the argument list. | + +**Note**: Detail Level applies when using stream detail level control on the `"sycl"` stream. The `"sycl.debug"` stream always provides VERBOSE-level metadata. ## Buffer management stream `"sycl.experimental.buffer"` Notification Signatures diff --git a/sycl/source/detail/queue_impl.cpp b/sycl/source/detail/queue_impl.cpp index b669a0fa63400..2bd688313187e 100644 --- a/sycl/source/detail/queue_impl.cpp +++ b/sycl/source/detail/queue_impl.cpp @@ -897,12 +897,19 @@ void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc, IId = xptiGetUniqueId(); auto WaitEvent = Event->event_ref(); - // We will allow the device type to be set - xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this)); - // We limit the amount of metadata that is added to the regular stream. - // Only "sycl.debug" stream will have the full information. This improves the - // performance when this data is not required by the tool or the collector. - if (isDebugStream(StreamID)) { + + // Get the effective detail level for this stream + auto Level = xptiGetEffectiveStreamDetailLevel(StreamID); + + // Device type is added at NORMAL level and above + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(WaitEvent, "sycl_device_type", queueDeviceToString(this)); + } + + // Debug metadata (sym_*) is added at VERBOSE level or if subscribing to + // sycl.debug stream (for backward compatibility) + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + isDebugStream(StreamID)) { if (HasSourceInfo) { xpti::addMetadata(WaitEvent, "sym_function_name", CodeLoc.functionName()); xpti::addMetadata(WaitEvent, "sym_source_file_name", CodeLoc.fileName()); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 322c2a96bc191..57cccadb8a24c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -91,7 +91,13 @@ static size_t deviceToID(const device &Device) { return reinterpret_cast(getSyclObjImpl(Device)->getHandleRef()); } -static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue) { +static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue, + xpti::stream_id_t StreamID) { + // Device metadata is only added at NORMAL level and above + auto Level = xptiGetEffectiveStreamDetailLevel(StreamID); + if (Level < xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) + return; + xpti::addMetadata(TraceEvent, "sycl_device_type", queueDeviceToString(Queue)); if (Queue) { xpti::addMetadata(TraceEvent, "sycl_device", @@ -102,8 +108,9 @@ static void addDeviceMetadata(xpti_td *TraceEvent, queue_impl *Queue) { } } static void addDeviceMetadata(xpti_td *TraceEvent, - const std::shared_ptr &Queue) { - addDeviceMetadata(TraceEvent, Queue.get()); + const std::shared_ptr &Queue, + xpti::stream_id_t StreamID) { + addDeviceMetadata(TraceEvent, Queue.get(), StreamID); } static unsigned long long getQueueID(queue_impl *Queue) { @@ -539,11 +546,16 @@ void Command::emitEdgeEventForCommandDependence( EdgeEvent->source_id = SrcEvent->unique_id; EdgeEvent->target_id = TgtEvent->unique_id; // We allow this metadata to be set as it describes the edge. + auto Level = xptiGetEffectiveStreamDetailLevel(MStreamID); if (IsCommand) { - xpti::addMetadata(EdgeEvent, "access_mode", - static_cast(AccMode.value())); + // memory_object is always added at BASIC level and above xpti::addMetadata(EdgeEvent, "memory_object", reinterpret_cast(ObjAddr)); + // access_mode is added at NORMAL level and above + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(EdgeEvent, "access_mode", + static_cast(AccMode.value())); + } } else { xpti::addMetadata(EdgeEvent, "event", reinterpret_cast(ObjAddr)); } @@ -615,10 +627,12 @@ void Command::emitEdgeEventForEventDependence(Command *Cmd, xpti_td *TgtEvent = static_cast(MTraceEvent); EdgeEvent->source_id = NodeEvent->unique_id; EdgeEvent->target_id = TgtEvent->unique_id; - // We allow this metadata to be set as an edge without the event address - // will be less useful. - xpti::addMetadata(EdgeEvent, "event", - reinterpret_cast(UrEventAddr)); + // event metadata is added at NORMAL level and above + auto Level = xptiGetEffectiveStreamDetailLevel(MStreamID); + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(EdgeEvent, "event", + reinterpret_cast(UrEventAddr)); + } xptiNotifySubscribers(MStreamID, xpti::trace_edge_create, detail::GSYCLGraphEvent, EdgeEvent, EdgeInstanceNo, nullptr); @@ -974,8 +988,8 @@ void AllocaCommandBase::emitInstrumentationData() { // internal infrastructure to guarantee collision free universal IDs. if (MTraceEvent) { xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); - // Memory-object is used frequently, so it is always added. + addDeviceMetadata(TE, MQueue, MStreamID); + // Memory-object is used frequently, so it is always added at BASIC level and above. xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant @@ -1093,10 +1107,14 @@ void AllocaSubBufCommand::emitInstrumentationData() { return; xpti_td *TE = static_cast(MTraceEvent); - xpti::addMetadata(TE, "offset", this->MRequirement.MOffsetInBytes); - xpti::addMetadata(TE, "access_range_start", - this->MRequirement.MAccessRange[0]); - xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); + // Offset and access range metadata are added at NORMAL level and above + auto Level = xptiGetEffectiveStreamDetailLevel(MStreamID); + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(TE, "offset", this->MRequirement.MOffsetInBytes); + xpti::addMetadata(TE, "access_range_start", + this->MRequirement.MAccessRange[0]); + xpti::addMetadata(TE, "access_range_end", this->MRequirement.MAccessRange[1]); + } xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); makeTraceEventEpilog(); #endif @@ -1170,9 +1188,13 @@ void ReleaseCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); - xpti::addMetadata(TE, "allocation_type", - commandToName(MAllocaCmd->getType())); + addDeviceMetadata(TE, MQueue, MStreamID); + // allocation_type is added at NORMAL level and above + auto Level = xptiGetEffectiveStreamDetailLevel(MStreamID); + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(TE, "allocation_type", + commandToName(MAllocaCmd->getType())); + } // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); @@ -1297,7 +1319,8 @@ void MapMemObject::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); + addDeviceMetadata(TE, MQueue, MStreamID); + // memory_object is always added at BASIC level and above xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant @@ -1360,7 +1383,8 @@ void UnMapMemObject::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *TE = static_cast(MTraceEvent); - addDeviceMetadata(TE, MQueue); + addDeviceMetadata(TE, MQueue, MStreamID); + // memory_object is always added at BASIC level and above xpti::addMetadata(TE, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant @@ -1455,13 +1479,18 @@ void MemCpyCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); + // memory_object is always added at BASIC level and above xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "copy_from", - MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); - xpti::addMetadata(CmdTraceEvent, "copy_to", - MQueue ? deviceToID(MQueue->get_device()) : 0); + // copy_from and copy_to are added at NORMAL level and above + auto Level = xptiGetEffectiveStreamDetailLevel(MStreamID); + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(CmdTraceEvent, "copy_from", + MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); + xpti::addMetadata(CmdTraceEvent, "copy_to", + MQueue ? deviceToID(MQueue->get_device()) : 0); + } // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); @@ -1628,13 +1657,18 @@ void MemCpyCommandHost::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); + // memory_object is always added at BASIC level and above xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); - xpti::addMetadata(CmdTraceEvent, "copy_from", - MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); - xpti::addMetadata(CmdTraceEvent, "copy_to", - MQueue ? deviceToID(MQueue->get_device()) : 0); + // copy_from and copy_to are added at NORMAL level and above + auto Level = xptiGetEffectiveStreamDetailLevel(MStreamID); + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL) { + xpti::addMetadata(CmdTraceEvent, "copy_from", + MSrcQueue ? deviceToID(MSrcQueue->get_device()) : 0); + xpti::addMetadata(CmdTraceEvent, "copy_to", + MQueue ? deviceToID(MQueue->get_device()) : 0); + } // Since we do NOT add queue_id value to metadata, we are stashing it to TLS // as this data is mutable and the metadata is supposed to be invariant xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(MQueue)); @@ -1724,7 +1758,8 @@ void EmptyCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); + // memory_object is always added at BASIC level and above xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS @@ -1789,7 +1824,8 @@ void UpdateHostRequirementCommand::emitInstrumentationData() { makeTraceEventProlog(MAddress); xpti_td *CmdTraceEvent = static_cast(MTraceEvent); - addDeviceMetadata(CmdTraceEvent, MQueue); + addDeviceMetadata(CmdTraceEvent, MQueue, MStreamID); + // memory_object is always added at BASIC level and above xpti::addMetadata(CmdTraceEvent, "memory_object", reinterpret_cast(MAddress)); // Since we do NOT add queue_id value to metadata, we are stashing it to TLS @@ -1985,13 +2021,21 @@ void instrumentationFillCommonData( OutInstanceID = CGKernelInstanceNo; OutTraceEvent = CmdTraceEvent; - addDeviceMetadata(CmdTraceEvent, Queue); + // Get the effective detail level for this stream + auto Level = xptiGetEffectiveStreamDetailLevel(StreamID); + + // Device metadata is added at NORMAL level and above + addDeviceMetadata(CmdTraceEvent, Queue, StreamID); + + // kernel_name is always added at BASIC level and above if (!KernelName.empty()) { xpti::addMetadata(CmdTraceEvent, "kernel_name", KernelName); } - // We limit the metadata to only include the kernel name and device - // information by default. - if (detail::isDebugStream(StreamID)) { + + // Debug metadata (from_source, sym_*) is added at VERBOSE level or + // if subscribing to sycl.debug stream (for backward compatibility) + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + detail::isDebugStream(StreamID)) { if (FromSource.has_value()) { xpti::addMetadata(CmdTraceEvent, "from_source", FromSource.value()); } @@ -2052,9 +2096,12 @@ std::pair emitKernelInstrumentationData( if (Queue) xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(Queue)); - // Add the additional metadata only if the debug information is subscribed - // to; in this case, it is the kernel and its parameters. - if (detail::isDebugStream(StreamID)) { + // Add the additional metadata only if VERBOSE level is enabled or + // subscribing to sycl.debug stream (for backward compatibility); + // in this case, it is the kernel and its parameters. + auto Level = xptiGetEffectiveStreamDetailLevel(StreamID); + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + detail::isDebugStream(StreamID)) { instrumentationAddExtraKernelMetadata( CmdTraceEvent, NDRDesc, KernelBundleImplPtr, DeviceKernelInfo, SyclKernel, Queue, CGArgs); @@ -2108,7 +2155,9 @@ void ExecCGCommand::emitInstrumentationData() { getQueueID(MQueue)); MTraceEvent = static_cast(CmdTraceEvent); if (MCommandGroup->getType() == detail::CGType::Kernel) { - if (detail::isDebugStream(MStreamID)) { + auto Level = xptiGetEffectiveStreamDetailLevel(MStreamID); + if (Level >= xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_VERBOSE || + detail::isDebugStream(MStreamID)) { auto KernelCG = reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( From ac9c6bc8cc52d721376a6ee4e0ed888dc4f3ddf4 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Fri, 3 Apr 2026 01:04:18 +0200 Subject: [PATCH 5/5] Fixes --- xptifw/src/xpti_trace_framework.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xptifw/src/xpti_trace_framework.cpp b/xptifw/src/xpti_trace_framework.cpp index 89682673d34eb..006c4a1b1bf0f 100644 --- a/xptifw/src/xpti_trace_framework.cpp +++ b/xptifw/src/xpti_trace_framework.cpp @@ -544,6 +544,10 @@ class Subscribers { // with the new stream information. if (MHandleLUT.size()) { for (auto &Handle : MHandleLUT) { + // Set default stream detail level for this subscriber + xptiSetSubscriberStreamDetailLevel(Handle.second.subscriber_id, + xptiRegisterStream(Stream), + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL); Handle.second.init(major_revision, minor_revision, version_string, Stream); } @@ -1947,7 +1951,7 @@ class Notifications { // Recalculate the effective level for this stream (max across all subscribers) uint8_t max_level = static_cast( - xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NORMAL); + xpti::stream_detail_level_t::XPTI_STREAM_DETAIL_LEVEL_NONE); for (const auto &subscriber_entry : MStreamDetailLevels) { const auto &stream_levels = subscriber_entry.second; @@ -1962,7 +1966,6 @@ class Notifications { // Update the cached effective level atomically MEffectiveStreamDetailLevels[stream].store(max_level, std::memory_order_release); - return xpti::result_t::XPTI_RESULT_SUCCESS; }