From 58c22dd70df949c9865d6e41d73f7f927860182f Mon Sep 17 00:00:00 2001 From: Eric Eaton Date: Tue, 30 Sep 2025 15:32:47 -0700 Subject: [PATCH 1/5] Replace GpuEvent with ZoneEvent This replaces the GpuEvent struct with ZoneEvent and a GpuExtra data type. This allows some common code to be merged, but since this is a minimal implementation of the idea, only a few small things are merged by this patch. --- csvexport/src/csvexport.cpp | 6 +- .../src/profiler/TracyTimelineItemGpu.cpp | 18 +-- profiler/src/profiler/TracyView.hpp | 32 ++--- .../src/profiler/TracyView_GpuTimeline.cpp | 58 ++++---- .../src/profiler/TracyView_Navigation.cpp | 17 +-- profiler/src/profiler/TracyView_Options.cpp | 12 +- .../src/profiler/TracyView_Statistics.cpp | 8 +- profiler/src/profiler/TracyView_Timeline.cpp | 5 +- profiler/src/profiler/TracyView_Utility.cpp | 103 ++++---------- profiler/src/profiler/TracyView_ZoneInfo.cpp | 77 +++++----- server/TracyEvent.hpp | 77 ++++++---- server/TracyWorker.cpp | 133 ++++++++---------- server/TracyWorker.hpp | 69 ++++----- 13 files changed, 275 insertions(+), 340 deletions(-) diff --git a/csvexport/src/csvexport.cpp b/csvexport/src/csvexport.cpp index dae114053c..f918c79270 100644 --- a/csvexport/src/csvexport.cpp +++ b/csvexport/src/csvexport.cpp @@ -299,9 +299,9 @@ int main(int argc, char** argv) const auto& zone_data = it->second; for (const auto& zone_thread_data : zone_data.zones) { - tracy::GpuEvent* gpu_event = zone_thread_data.Zone(); - const auto start = gpu_event->GpuStart(); - const auto end = gpu_event->GpuEnd(); + auto& gpu_event = worker.GetGpuExtra( *zone_thread_data.Zone() ); + const auto start = gpu_event.GpuStart(); + const auto end = gpu_event.GpuEnd(); values[2] = std::to_string( start ); diff --git a/profiler/src/profiler/TracyTimelineItemGpu.cpp b/profiler/src/profiler/TracyTimelineItemGpu.cpp index b499c64f17..01cd0bc065 100644 --- a/profiler/src/profiler/TracyTimelineItemGpu.cpp +++ b/profiler/src/profiler/TracyTimelineItemGpu.cpp @@ -70,12 +70,12 @@ void TimelineItemGpu::HeaderTooltip( const char* label ) const { if( it->second.timeline.is_magic() ) { - auto& tl = *(Vector*)&it->second.timeline; - tid = m_worker.DecompressThread( tl.begin()->Thread() ); + auto& tl = *(Vector*)&it->second.timeline; + tid = m_worker.DecompressThread( m_worker.GetGpuExtra( *tl.begin() ).Thread() ); } else { - tid = m_worker.DecompressThread( (*it->second.timeline.begin())->Thread() ); + tid = m_worker.DecompressThread( m_worker.GetGpuExtra( **it->second.timeline.begin() ).Thread() ); } } } @@ -147,11 +147,11 @@ int64_t TimelineItemGpu::RangeBegin() const int64_t t0; if( td.second.timeline.is_magic() ) { - t0 = ((Vector*)&td.second.timeline)->front().GpuStart(); + t0 = ((Vector*)&td.second.timeline)->front().Start(); } else { - t0 = td.second.timeline.front()->GpuStart(); + t0 = td.second.timeline.front()->Start(); } if( t0 >= 0 ) { @@ -169,21 +169,21 @@ int64_t TimelineItemGpu::RangeEnd() const int64_t t0; if( td.second.timeline.is_magic() ) { - t0 = ((Vector*)&td.second.timeline)->front().GpuStart(); + t0 = ((Vector*)&td.second.timeline)->front().Start(); } else { - t0 = td.second.timeline.front()->GpuStart(); + t0 = td.second.timeline.front()->Start(); } if( t0 >= 0 ) { if( td.second.timeline.is_magic() ) { - t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( ((Vector*)&td.second.timeline)->back() ) ) ); + t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEndGPU( ((Vector*)&td.second.timeline)->back() ) ) ); } else { - t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEnd( *td.second.timeline.back() ) ) ); + t = std::max( t, std::min( m_worker.GetLastTime(), m_worker.GetZoneEndGPU( *td.second.timeline.back() ) ) ); } } } diff --git a/profiler/src/profiler/TracyView.hpp b/profiler/src/profiler/TracyView.hpp index 23f308eded..f8a2b37c4e 100644 --- a/profiler/src/profiler/TracyView.hpp +++ b/profiler/src/profiler/TracyView.hpp @@ -255,7 +255,7 @@ class View void DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int offset, uint64_t tid, int maxDepth, double margin ); void DrawThreadCropper( const int depth, const uint64_t tid, const float xPos, const float yPos, const float ostep, const float cropperWidth, const bool hasCtxSwitches ); void DrawContextSwitchList( const TimelineContext& ctx, const std::vector& drawList, const Vector& ctxSwitch, int offset, int endOffset, bool isFiber ); - int DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); + int DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); template int DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); template @@ -333,21 +333,21 @@ class View uint32_t GetSrcLocColor( const SourceLocation& srcloc, int depth ); uint32_t GetRawSrcLocColor( const SourceLocation& srcloc, int depth ); uint32_t GetZoneColor( const ZoneEvent& ev, uint64_t thread, int depth ); - uint32_t GetZoneColor( const GpuEvent& ev ); + uint32_t GetZoneColor( const ZoneEvent& ev ); ZoneColorData GetZoneColorData( const ZoneEvent& ev, uint64_t thread, int depth, uint32_t inheritedColor ); - ZoneColorData GetZoneColorData( const GpuEvent& ev ); + ZoneColorData GetZoneColorData( const ZoneEvent& ev ); void ZoomToZone( const ZoneEvent& ev ); - void ZoomToZone( const GpuEvent& ev ); + void ZoomToZoneGPU( const ZoneEvent& ev ); void ZoomToPrevFrame(); void ZoomToNextFrame(); void CenterAtTime( int64_t t ); void ShowZoneInfo( const ZoneEvent& ev ); - void ShowZoneInfo( const GpuEvent& ev, uint64_t thread ); + void ShowZoneInfo( const ZoneEvent& ev, uint64_t thread ); void ZoneTooltip( const ZoneEvent& ev ); - void ZoneTooltip( const GpuEvent& ev ); + void ZoneTooltipGPU( const ZoneEvent& ev ); void CallstackTooltip( uint32_t idx ); void CallstackTooltipContents( uint32_t idx ); void CrashTooltip(); @@ -357,11 +357,11 @@ class View const ZoneEvent* GetZoneChild( const ZoneEvent& zone, int64_t time ) const; bool IsZoneReentry( const ZoneEvent& zone ) const; bool IsZoneReentry( const ZoneEvent& zone, uint64_t tid ) const; - const GpuEvent* GetZoneParent( const GpuEvent& zone ) const; + const ZoneEvent* GetZoneParentGPU( const ZoneEvent& zone ) const; const ThreadData* GetZoneThreadData( const ZoneEvent& zone ) const; uint64_t GetZoneThread( const ZoneEvent& zone ) const; - uint64_t GetZoneThread( const GpuEvent& zone ) const; - const GpuCtxData* GetZoneCtx( const GpuEvent& zone ) const; + uint64_t GetZoneThreadGPU( const EventAdapter& zone ) const; + const GpuCtxData* GetZoneCtx( const ZoneEvent& zone ) const; bool FindMatchingZone( int prev0, int prev1, int flags ); const ZoneEvent* FindZoneAtTime( uint64_t thread, int64_t time ) const; uint64_t GetFrameNumber( const FrameData& fd, int i ) const; @@ -379,12 +379,10 @@ class View void SmallCallstackButton( const char* name, uint32_t callstack, int& idx, bool tooltip = true ); void DrawCallstackCalls( uint32_t callstack, uint16_t limit ) const; void SetViewToLastFrames(); - int64_t GetZoneChildTime( const ZoneEvent& zone ); - int64_t GetZoneChildTime( const GpuEvent& zone ); + int64_t GetZoneChildTime( const ZoneEvent& zone, bool gpu ); int64_t GetZoneChildTimeFast( const ZoneEvent& zone ); int64_t GetZoneChildTimeFastClamped( const ZoneEvent& zone, int64_t t0, int64_t t1 ); - int64_t GetZoneSelfTime( const ZoneEvent& zone ); - int64_t GetZoneSelfTime( const GpuEvent& zone ); + int64_t GetZoneSelfTime( const ZoneEvent& zone, bool gpu = false ); bool GetZoneRunningTime( const ContextSwitch* ctx, const ZoneEvent& ev, int64_t& time, uint64_t& cnt ); bool GetZoneRunningTime( const ContextSwitch* ctx, const ZoneEvent& ev, const RangeSlim& range, int64_t& time, uint64_t& cnt ); const char* GetThreadContextData( uint64_t thread, bool& local, bool& untracked, const char*& program ); @@ -479,8 +477,8 @@ class View DecayValue m_msgHighlight = nullptr; DecayValue m_lockHoverHighlight = InvalidId; DecayValue m_msgToFocus = nullptr; - const GpuEvent* m_gpuInfoWindow = nullptr; - const GpuEvent* m_gpuHighlight; + const ZoneEvent* m_gpuInfoWindow = nullptr; + const ZoneEvent* m_gpuHighlight; uint64_t m_gpuInfoWindowThread; uint32_t m_callstackInfoWindow = 0; int64_t m_memoryAllocInfoWindow = -1; @@ -575,7 +573,7 @@ class View BuzzAnim m_statBuzzAnim; Vector m_zoneInfoStack; - Vector m_gpuInfoStack; + Vector m_gpuInfoStack; SourceContents m_srcHintCache; std::unique_ptr m_sourceView; @@ -877,8 +875,6 @@ class View struct { std::pair zoneSelfTime = { nullptr, 0 }; std::pair zoneSelfTime2 = { nullptr, 0 }; - std::pair gpuSelfTime = { nullptr, 0 }; - std::pair gpuSelfTime2 = { nullptr, 0 }; } m_cache; struct { diff --git a/profiler/src/profiler/TracyView_GpuTimeline.cpp b/profiler/src/profiler/TracyView_GpuTimeline.cpp index 4b692d1c2c..08ba7af269 100644 --- a/profiler/src/profiler/TracyView_GpuTimeline.cpp +++ b/profiler/src/profiler/TracyView_GpuTimeline.cpp @@ -40,10 +40,10 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs assert( !tl.empty() ); if( tl.is_magic() ) { - auto& tlm = *(Vector*)&tl; - if( tlm.front().GpuStart() >= 0 ) + auto& tlm = *(Vector*)&tl; + if( tlm.front().Start() >= 0 ) { - const auto begin = tlm.front().GpuStart(); + const auto begin = tlm.front().Start(); const auto drift = GpuDrift( &gpu ); if( !singleThread ) offset += sstep; const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, gpu.thread, yMin, yMax, begin, drift ); @@ -68,9 +68,9 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs } else { - if( tl.front()->GpuStart() >= 0 ) + if( tl.front()->Start() >= 0 ) { - const auto begin = tl.front()->GpuStart(); + const auto begin = tl.front()->Start(); const auto drift = GpuDrift( &gpu ); if( !singleThread ) offset += sstep; const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, gpu.thread, yMin, yMax, begin, drift ); @@ -97,7 +97,7 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs return depth != 0; } -int View::DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) +int View::DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) { const auto ty = ImGui::GetTextLineHeight(); const auto ostep = ty + 1; @@ -108,22 +108,22 @@ int View::DispatchGpuZoneLevel( const Vector>& vec, bool hov { if( vec.is_magic() ) { - return DrawGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return DrawGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } else { - return DrawGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return DrawGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } } else { if( vec.is_magic() ) { - return SkipGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return SkipGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } else { - return SkipGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return SkipGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); } } } @@ -132,14 +132,14 @@ template int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) { // cast to uint64_t, so that unended zones (end = -1) are still drawn - auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == vec.end() ) return depth; Adapter a; - const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuStart(), begin, drift ) < (uint64_t)r; } ); + const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).Start(), begin, drift ) < (uint64_t)r; } ); if( it == zitend ) return depth; - if( AdjustGpuTime( a(*(zitend-1)).GpuEnd(), begin, drift ) < m_vd.zvStart ) return depth; + if( AdjustGpuTime( a(*(zitend-1)).End(), begin, drift ) < m_vd.zvStart ) return depth; const auto w = ImGui::GetContentRegionAvail().x - 1; const auto ty = ImGui::GetTextLineHeight(); @@ -153,8 +153,8 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, while( it < zitend ) { - auto& ev = a(*it); - auto end = m_worker.GetZoneEnd( ev ); + auto& ev = m_worker.GetGpuExtra(a(*it)); + auto end = m_worker.GetZoneEndGPU( ev ); if( end == std::numeric_limits::max() ) break; const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift ); end = AdjustGpuTime( end, begin, drift ); @@ -171,11 +171,11 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, for(;;) { const auto prevIt = it; - it = std::lower_bound( it, zitend, std::max( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + it = std::lower_bound( it, zitend, std::max( 0, nextTime ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == prevIt ) ++it; num += std::distance( prevIt, it ); if( it == zitend ) break; - const auto nend = AdjustGpuTime( m_worker.GetZoneEnd( a(*it) ), begin, drift ); + const auto nend = AdjustGpuTime( m_worker.GetZoneEndGPU( a(*it) ), begin, drift ); const auto nsnext = nend - m_vd.zvStart; if( nsnext < 0 || nsnext - px1ns >= MinVisNs * 2 ) break; px1ns = nsnext; @@ -203,11 +203,11 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, else { const auto zoneThread = thread != 0 ? thread : m_worker.DecompressThread( ev.Thread() ); - ZoneTooltip( ev ); + ZoneTooltipGPU( ev ); if( IsMouseClicked( 2 ) && rend - start > 0 ) { - ZoomToZone( ev ); + ZoomToZoneGPU( ev ); } if( IsMouseClicked( 0 ) ) { @@ -288,12 +288,12 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, if( hover && ImGui::IsMouseHoveringRect( wpos + ImVec2( px0, offset ), wpos + ImVec2( px1, offset + tsz.y + 1 ) ) ) { - const auto zoneThread = thread != 0 ? thread : m_worker.DecompressThread( ev.Thread() ); - ZoneTooltip( ev ); + const auto zoneThread = thread != 0 ? thread : m_worker.DecompressThread( ev.thread ); + ZoneTooltipGPU( ev ); if( !m_zoomAnim.active && IsMouseClicked( 2 ) ) { - ZoomToZone( ev ); + ZoomToZoneGPU( ev ); } if( IsMouseClicked( 0 ) ) { @@ -315,14 +315,14 @@ template int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) { // cast to uint64_t, so that unended zones (end = -1) are still drawn - auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == vec.end() ) return depth; Adapter a; - const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuStart(), begin, drift ) < (uint64_t)r; } ); + const auto zitend = std::lower_bound( it, vec.end(), std::max( 0, m_vd.zvEnd ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).Start(), begin, drift ) < (uint64_t)r; } ); if( it == zitend ) return depth; - if( AdjustGpuTime( a(*(zitend-1)).GpuEnd(), begin, drift ) < m_vd.zvStart ) return depth; + if( AdjustGpuTime( a(*(zitend-1)).End(), begin, drift ) < m_vd.zvStart ) return depth; depth++; int maxdepth = depth; @@ -330,9 +330,9 @@ int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, while( it < zitend ) { auto& ev = a(*it); - auto end = m_worker.GetZoneEnd( ev ); + auto end = m_worker.GetZoneEndGPU( ev ); if( end == std::numeric_limits::max() ) break; - const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift ); + const auto start = AdjustGpuTime( ev.Start(), begin, drift ); end = AdjustGpuTime( end, begin, drift ); const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 ); if( zsz < MinVisSize ) @@ -343,10 +343,10 @@ int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, for(;;) { const auto prevIt = it; - it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).GpuEnd(), begin, drift ) < (uint64_t)r; } ); + it = std::lower_bound( it, zitend, nextTime, [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); if( it == prevIt ) ++it; if( it == zitend ) break; - const auto nend = AdjustGpuTime( m_worker.GetZoneEnd( a(*it) ), begin, drift ); + const auto nend = AdjustGpuTime( m_worker.GetZoneEndGPU( a(*it) ), begin, drift ); const auto nsnext = nend - m_vd.zvStart; if( nsnext - px1ns >= MinVisNs * 2 ) break; px1ns = nsnext; diff --git a/profiler/src/profiler/TracyView_Navigation.cpp b/profiler/src/profiler/TracyView_Navigation.cpp index 75f5eaea29..25fa85fb8f 100644 --- a/profiler/src/profiler/TracyView_Navigation.cpp +++ b/profiler/src/profiler/TracyView_Navigation.cpp @@ -10,30 +10,31 @@ void View::ZoomToZone( const ZoneEvent& ev ) ZoomToRange( ev.Start(), end ); } -void View::ZoomToZone( const GpuEvent& ev ) +void View::ZoomToZoneGPU( const ZoneEvent& ev ) { - const auto end = m_worker.GetZoneEnd( ev ); - if( end - ev.GpuStart() <= 0 ) return; + const auto end = m_worker.GetZoneEndGPU( ev ); + if( end - ev.Start() <= 0 ) return; auto ctx = GetZoneCtx( ev ); if( !ctx ) { - ZoomToRange( ev.GpuStart(), end ); + ZoomToRange( ev.Start(), end ); } else { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); + auto thread = m_worker.DecompressThread( m_worker.GetGpuExtra(ev).thread ); + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( thread ); assert( td != ctx->threadData.end() ); int64_t begin; if( td->second.timeline.is_magic() ) { - begin = ((Vector*)&td->second.timeline)->front().GpuStart(); + begin = ((Vector*)&td->second.timeline)->front().Start(); } else { - begin = td->second.timeline.front()->GpuStart(); + begin = td->second.timeline.front()->Start(); } const auto drift = GpuDrift( ctx ); - ZoomToRange( AdjustGpuTime( ev.GpuStart(), begin, drift ), AdjustGpuTime( end, begin, drift ) ); + ZoomToRange( AdjustGpuTime( ev.Start(), begin, drift ), AdjustGpuTime( end, begin, drift ) ); } } diff --git a/profiler/src/profiler/TracyView_Options.cpp b/profiler/src/profiler/TracyView_Options.cpp index 45ca4fac63..4f5c6294fd 100644 --- a/profiler/src/profiler/TracyView_Options.cpp +++ b/profiler/src/profiler/TracyView_Options.cpp @@ -161,10 +161,10 @@ void View::DrawOptions() size_t lastidx = 0; if( timeline.is_magic() ) { - auto& tl = *((Vector*)&timeline); + auto& tl = *( (Vector*)&timeline ); for( size_t j=tl.size()-1; j > 0; j-- ) { - if( tl[j].GpuEnd() >= 0 ) + if( tl[j].End() >= 0 ) { lastidx = j; break; @@ -175,7 +175,7 @@ void View::DrawOptions() { for( size_t j=timeline.size()-1; j > 0; j-- ) { - if( timeline[j]->GpuEnd() >= 0 ) + if( timeline[j]->End() >= 0 ) { lastidx = j; break; @@ -191,14 +191,14 @@ void View::DrawOptions() size_t idx = 0; if( timeline.is_magic() ) { - auto& tl = *((Vector*)&timeline); + auto& tl = *( (Vector*)&timeline ); do { const auto p0 = dist( gen ); const auto p1 = dist( gen ); if( p0 != p1 ) { - slopes[idx++] = float( 1.0 - double( tl[p1].GpuStart() - tl[p0].GpuStart() ) / double( tl[p1].CpuStart() - tl[p0].CpuStart() ) ); + slopes[idx++] = float( 1.0 - double( tl[p1].Start() - tl[p0].Start() ) / double( m_worker.GetGpuExtra( tl[p1] ).CpuStart() - m_worker.GetGpuExtra( tl[p0] ).CpuStart() ) ); } } while( idx < NumSlopes ); @@ -211,7 +211,7 @@ void View::DrawOptions() const auto p1 = dist( gen ); if( p0 != p1 ) { - slopes[idx++] = float( 1.0 - double( timeline[p1]->GpuStart() - timeline[p0]->GpuStart() ) / double( timeline[p1]->CpuStart() - timeline[p0]->CpuStart() ) ); + slopes[idx++] = float( 1.0 - double( timeline[p1]->Start() - timeline[p0]->Start() ) / double( m_worker.GetGpuExtra( *timeline[p1] ).CpuStart() - m_worker.GetGpuExtra( *timeline[p0] ).CpuStart() ) ); } } while( idx < NumSlopes ); diff --git a/profiler/src/profiler/TracyView_Statistics.cpp b/profiler/src/profiler/TracyView_Statistics.cpp index f8101897b2..35f3ab7c1e 100644 --- a/profiler/src/profiler/TracyView_Statistics.cpp +++ b/profiler/src/profiler/TracyView_Statistics.cpp @@ -364,8 +364,8 @@ void View::DrawStatistics() for( auto& v : it->second.zones ) { auto& z = *v.Zone(); - const auto start = z.GpuStart(); - const auto end = z.GpuEnd(); + const auto start = z.Start(); + const auto end = z.End(); if( start >= min && end <= max ) { const auto zt = end - start; @@ -403,8 +403,8 @@ void View::DrawStatistics() for( auto& v : it->second.zones ) { auto& z = *v.Zone(); - const auto start = z.GpuStart(); - const auto end = z.GpuEnd(); + const auto start = z.Start(); + const auto end = z.End(); if( start >= min && end <= max ) { const auto zt = end - start; diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index 803fcb69b7..26de28a3c1 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -473,8 +473,9 @@ void View::DrawTimeline() } if( m_gpuInfoWindow ) { - const auto px0 = ( m_gpuInfoWindow->CpuStart() - m_vd.zvStart ) * pxns; - const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( m_gpuInfoWindow->CpuEnd() - m_vd.zvStart ) * pxns ); + const auto ex = m_worker.GetGpuExtra( *m_gpuInfoWindow ); + const auto px0 = ( ex.CpuStart() - m_vd.zvStart ) * pxns; + const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( ex.CpuEnd() - m_vd.zvStart ) * pxns ); draw->AddRectFilled( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x2288DD88 ); draw->AddRect( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x4488DD88 ); } diff --git a/profiler/src/profiler/TracyView_Utility.cpp b/profiler/src/profiler/TracyView_Utility.cpp index cc08264a9d..7ced6ab75d 100644 --- a/profiler/src/profiler/TracyView_Utility.cpp +++ b/profiler/src/profiler/TracyView_Utility.cpp @@ -77,7 +77,7 @@ uint32_t View::GetZoneColor( const ZoneEvent& ev, uint64_t thread, int depth ) } } -uint32_t View::GetZoneColor( const GpuEvent& ev ) +uint32_t View::GetZoneColor( const ZoneEvent& ev ) { const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto color = srcloc.color; @@ -136,7 +136,7 @@ View::ZoneColorData View::GetZoneColorData( const ZoneEvent& ev, uint64_t thread return ret; } -View::ZoneColorData View::GetZoneColorData( const GpuEvent& ev ) +View::ZoneColorData View::GetZoneColorData( const ZoneEvent& ev ) { ZoneColorData ret; const auto color = GetZoneColor( ev ); @@ -395,23 +395,23 @@ bool View::IsZoneReentry( const ZoneEvent& zone, uint64_t tid ) const return false; } -const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const +const ZoneEvent* View::GetZoneParentGPU( const ZoneEvent& zone ) const { for( const auto& ctx : m_worker.GetGpuData() ) { for( const auto& td : ctx->threadData ) { - const GpuEvent* parent = nullptr; - const Vector>* timeline = &td.second.timeline; + const ZoneEvent* parent = nullptr; + const Vector>* timeline = &td.second.timeline; if( timeline->empty() ) continue; for(;;) { if( timeline->is_magic() ) { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.GpuStart(); } ); + auto vec = (Vector*)timeline; + auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.Start(); } ); if( it != vec->begin() ) --it; - if( zone.GpuEnd() >= 0 && it->GpuStart() > zone.GpuEnd() ) break; + if( zone.End() >= 0 && it->Start() > zone.End() ) break; if( it == &zone ) return parent; if( it->Child() < 0 ) break; parent = it; @@ -419,9 +419,9 @@ const GpuEvent* View::GetZoneParent( const GpuEvent& zone ) const } else { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->Start(); } ); if( it != timeline->begin() ) --it; - if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; + if( zone.End() >= 0 && (*it)->Start() > zone.End() ) break; if( *it == &zone ) return parent; if( (*it)->Child() < 0 ) break; parent = *it; @@ -486,33 +486,33 @@ uint64_t View::GetZoneThread( const ZoneEvent& zone ) const return threadData ? threadData->id : 0; } -uint64_t View::GetZoneThread( const GpuEvent& zone ) const +uint64_t View::GetZoneThreadGPU( const EventAdapter& zone ) const { if( zone.Thread() == 0 ) { for( const auto& ctx : m_worker.GetGpuData() ) { if ( ctx->threadData.size() != 1 ) continue; - const Vector>* timeline = &ctx->threadData.begin()->second.timeline; + const Vector>* timeline = &ctx->threadData.begin()->second.timeline; if( timeline->empty() ) continue; for(;;) { if( timeline->is_magic() ) { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.GpuStart(); } ); + auto vec = (Vector*)timeline; + auto it = std::upper_bound( vec->begin(), vec->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.Start(); } ); if( it != vec->begin() ) --it; - if( zone.GpuEnd() >= 0 && it->GpuStart() > zone.GpuEnd() ) break; - if( it == &zone ) return ctx->thread; + if( zone.GpuEnd() >= 0 && it->Start() > zone.GpuEnd() ) break; + if( it == &zone.event ) return ctx->thread; if( it->Child() < 0 ) break; timeline = &m_worker.GetGpuChildren( it->Child() ); } else { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->Start(); } ); if( it != timeline->begin() ) --it; - if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; - if( *it == &zone ) return ctx->thread; + if( zone.GpuEnd() >= 0 && (*it)->Start() > zone.GpuEnd() ) break; + if( *it == &zone.event ) return ctx->thread; if( (*it)->Child() < 0 ) break; timeline = &m_worker.GetGpuChildren( (*it)->Child() ); } @@ -526,31 +526,31 @@ uint64_t View::GetZoneThread( const GpuEvent& zone ) const } } -const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const +const GpuCtxData* View::GetZoneCtx( const ZoneEvent& zone ) const { for( const auto& ctx : m_worker.GetGpuData() ) { for( const auto& td : ctx->threadData ) { - const Vector>* timeline = &td.second.timeline; + const Vector>* timeline = &td.second.timeline; if( timeline->empty() ) continue; for(;;) { if( timeline->is_magic() ) { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.GpuStart(); } ); + auto vec = (Vector*)timeline; + auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.Start(); } ); if( it != vec->begin() ) --it; - if( zone.GpuEnd() >= 0 && it->GpuStart() > zone.GpuEnd() ) break; + if( zone.End() >= 0 && it->Start() > zone.End() ) break; if( it == &zone ) return ctx; if( it->Child() < 0 ) break; timeline = &m_worker.GetGpuChildren( it->Child() ); } else { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.GpuStart(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->GpuStart(); } ); + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->Start(); } ); if( it != timeline->begin() ) --it; - if( zone.GpuEnd() >= 0 && (*it)->GpuStart() > zone.GpuEnd() ) break; + if( zone.End() >= 0 && (*it)->Start() > zone.End() ) break; if( *it == &zone ) return ctx; if( (*it)->Child() < 0 ) break; timeline = &m_worker.GetGpuChildren( (*it)->Child() ); @@ -561,12 +561,12 @@ const GpuCtxData* View::GetZoneCtx( const GpuEvent& zone ) const return nullptr; } -int64_t View::GetZoneChildTime( const ZoneEvent& zone ) +int64_t View::GetZoneChildTime( const ZoneEvent& zone, bool gpu ) { int64_t time = 0; if( zone.HasChildren() ) { - auto& children = m_worker.GetZoneChildren( zone.Child() ); + auto& children = gpu ? m_worker.GetGpuChildren( zone.Child() ) : m_worker.GetZoneChildren( zone.Child() ); if( children.is_magic() ) { auto& vec = *(Vector*)&children; @@ -588,33 +588,6 @@ int64_t View::GetZoneChildTime( const ZoneEvent& zone ) return time; } -int64_t View::GetZoneChildTime( const GpuEvent& zone ) -{ - int64_t time = 0; - if( zone.Child() >= 0 ) - { - auto& children = m_worker.GetGpuChildren( zone.Child() ); - if( children.is_magic() ) - { - auto& vec = *(Vector*)&children; - for( auto& v : vec ) - { - const auto childSpan = std::max( int64_t( 0 ), v.GpuEnd() - v.GpuStart() ); - time += childSpan; - } - } - else - { - for( auto& v : children ) - { - const auto childSpan = std::max( int64_t( 0 ), v->GpuEnd() - v->GpuStart() ); - time += childSpan; - } - } - } - return time; -} - int64_t View::GetZoneChildTimeFast( const ZoneEvent& zone ) { int64_t time = 0; @@ -681,12 +654,12 @@ int64_t View::GetZoneChildTimeFastClamped( const ZoneEvent& zone, int64_t t0, in return time; } -int64_t View::GetZoneSelfTime( const ZoneEvent& zone ) +int64_t View::GetZoneSelfTime( const ZoneEvent& zone, bool gpu ) { if( m_cache.zoneSelfTime.first == &zone ) return m_cache.zoneSelfTime.second; if( m_cache.zoneSelfTime2.first == &zone ) return m_cache.zoneSelfTime2.second; - const auto ztime = m_worker.GetZoneEnd( zone ) - zone.Start(); - const auto selftime = ztime - GetZoneChildTime( zone ); + const auto ztime = (gpu ? m_worker.GetZoneEndGPU( zone ) : m_worker.GetZoneEnd( zone ) ) - zone.Start(); + const auto selftime = ztime - GetZoneChildTime( zone, gpu ); if( zone.IsEndValid() ) { m_cache.zoneSelfTime2 = m_cache.zoneSelfTime; @@ -695,20 +668,6 @@ int64_t View::GetZoneSelfTime( const ZoneEvent& zone ) return selftime; } -int64_t View::GetZoneSelfTime( const GpuEvent& zone ) -{ - if( m_cache.gpuSelfTime.first == &zone ) return m_cache.gpuSelfTime.second; - if( m_cache.gpuSelfTime2.first == &zone ) return m_cache.gpuSelfTime2.second; - const auto ztime = m_worker.GetZoneEnd( zone ) - zone.GpuStart(); - const auto selftime = ztime - GetZoneChildTime( zone ); - if( zone.GpuEnd() >= 0 ) - { - m_cache.gpuSelfTime2 = m_cache.gpuSelfTime; - m_cache.gpuSelfTime = std::make_pair( &zone, selftime ); - } - return selftime; -} - bool View::GetZoneRunningTime( const ContextSwitch* ctx, const ZoneEvent& ev, int64_t& time, uint64_t& cnt ) { auto it = std::lower_bound( ctx->v.begin(), ctx->v.end(), ev.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l.End() < (uint64_t)r; } ); diff --git a/profiler/src/profiler/TracyView_ZoneInfo.cpp b/profiler/src/profiler/TracyView_ZoneInfo.cpp index f1b6d3fbfb..e501327933 100644 --- a/profiler/src/profiler/TracyView_ZoneInfo.cpp +++ b/profiler/src/profiler/TracyView_ZoneInfo.cpp @@ -22,12 +22,6 @@ inline uint32_t GetZoneCallstack( const ZoneEvent& ev, const Worker& return worker.GetZoneExtra( ev ).callstack.Val(); } -template<> -inline uint32_t GetZoneCallstack( const GpuEvent& ev, const Worker& worker ) -{ - return ev.callstack.Val(); -} - void View::CalcZoneTimeData( unordered_flat_map& data, int64_t& ztime, const ZoneEvent& zone ) { assert( zone.HasChildren() ); @@ -1439,7 +1433,7 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) void View::DrawGpuInfoWindow() { - auto& ev = *m_gpuInfoWindow; + auto& ev = m_worker.GetGpuExtra(*m_gpuInfoWindow); const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto scale = GetScale(); @@ -1450,9 +1444,9 @@ void View::DrawGpuInfoWindow() { if( ImGui::Button( ICON_FA_MICROSCOPE " Zoom to zone" ) ) { - ZoomToZone( ev ); + ZoomToZoneGPU( ev ); } - auto parent = GetZoneParent( ev ); + auto parent = GetZoneParentGPU( ev ); if( parent ) { ImGui::SameLine(); @@ -1507,7 +1501,7 @@ void View::DrawGpuInfoWindow() ImGui::Separator(); - const auto tid = GetZoneThread( ev ); + const auto tid = GetZoneThreadGPU( ev ); ImGui::PushFont( g_fonts.normal, FontBig ); TextFocusedClipboard( "Zone name:", m_worker.GetString( srcloc.name ), m_worker.GetString( srcloc.name ), 1, g_fonts.normal, FontNormal ); ImGui::SameLine(); @@ -1527,9 +1521,9 @@ void View::DrawGpuInfoWindow() ImGui::Separator(); ImGui::BeginChild( "##gpuinfo" ); - const auto end = m_worker.GetZoneEnd( ev ); + const auto end = m_worker.GetZoneEndGPU( ev ); const auto ztime = end - ev.GpuStart(); - const auto selftime = GetZoneSelfTime( ev ); + const auto selftime = GetZoneSelfTime( ev, true ); TextFocused( "Time from start of program:", TimeToStringExact( ev.GpuStart() ) ); TextFocused( "GPU execution time:", TimeToString( ztime ) ); TextFocused( "GPU self time:", TimeToString( selftime ) ); @@ -1553,11 +1547,11 @@ void View::DrawGpuInfoWindow() int64_t begin; if( td->second.timeline.is_magic() ) { - begin = ((Vector*)&td->second.timeline)->front().GpuStart(); + begin = ((Vector*)&td->second.timeline)->front().Start(); } else { - begin = td->second.timeline.front()->GpuStart(); + begin = td->second.timeline.front()->Start(); } const auto drift = GpuDrift( ctx ); TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) ); @@ -1580,14 +1574,14 @@ void View::DrawGpuInfoWindow() ImGui::Separator(); - std::vector zoneTrace; + std::vector zoneTrace; while( parent ) { zoneTrace.emplace_back( parent ); - parent = GetZoneParent( *parent ); + parent = GetZoneParentGPU( *parent ); } int idx = 0; - DrawZoneTrace( &ev, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this] ( const GpuEvent* v, int& fidx ) { + DrawZoneTrace( &ev.event, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this] ( const ZoneEvent* v, int& fidx ) { ImGui::TextDisabled( "%i.", fidx++ ); ImGui::SameLine(); const auto& srcloc = m_worker.GetSourceLocation( v->SrcLoc() ); @@ -1606,7 +1600,7 @@ void View::DrawGpuInfoWindow() { ImGui::SameLine(); } - ImGui::TextDisabled( "(%s) %s", TimeToString( m_worker.GetZoneEnd( *v ) - v->GpuStart() ), LocationToString( fileName, srcloc.line ) ); + ImGui::TextDisabled( "(%s) %s", TimeToString( m_worker.GetZoneEndGPU( *v ) - v->Start() ), LocationToString( fileName, srcloc.line ) ); ImGui::PopID(); if( ImGui::IsItemClicked( 1 ) ) { @@ -1628,9 +1622,9 @@ void View::DrawGpuInfoWindow() m_gpuHighlight = v; if( IsMouseClicked( 2 ) ) { - ZoomToZone( *v ); + ZoomToZoneGPU( *v ); } - ZoneTooltip( *v ); + ZoneTooltipGPU( *v ); } } ); @@ -1644,11 +1638,11 @@ void View::DrawGpuInfoWindow() { if( children.is_magic() ) { - DrawGpuInfoChildren>( *(Vector*)( &children ), ztime ); + DrawGpuInfoChildren>( *(Vector*)( &children ), ztime ); } else { - DrawGpuInfoChildren>( children, ztime ); + DrawGpuInfoChildren>( children, ztime ); } ImGui::TreePop(); } @@ -1689,8 +1683,8 @@ void View::DrawGpuInfoChildren( const V& children, int64_t ztime ) for( size_t i=0; isecond.timeline.is_magic() ) { - begin = ((Vector*)&td->second.timeline)->front().GpuStart(); + begin = ((Vector*)&td->second.timeline)->front().Start(); } else { - begin = td->second.timeline.front()->GpuStart(); + begin = td->second.timeline.front()->Start(); } const auto drift = GpuDrift( ctx ); TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) ); diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index b2276deece..39417a48e8 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -236,6 +236,13 @@ struct ZoneExtra enum { ZoneExtraSize = sizeof( ZoneExtra ) }; +struct GpuExtra : public ZoneExtra +{ + uint16_t query_id; + uint16_t thread; + Int48 otherStart; + Int48 otherEnd; +}; // This union exploits the fact that the current implementations of x64 and arm64 do not provide // full 64 bit address space. The high bits must be bit-extended, so 0x80... is an invalid pointer. @@ -389,36 +396,44 @@ struct LockHighlight bool blocked; }; - -struct GpuEvent -{ - tracy_force_inline int64_t CpuStart() const { return int64_t( _cpuStart_srcloc ) >> 16; } - tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuStart_srcloc)+2, &cpuStart, 4 ); memcpy( ((char*)&_cpuStart_srcloc)+6, ((char*)&cpuStart)+4, 2 ); } - tracy_force_inline int64_t CpuEnd() const { return int64_t( _cpuEnd_thread ) >> 16; } - tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_cpuEnd_thread)+2, &cpuEnd, 4 ); memcpy( ((char*)&_cpuEnd_thread)+6, ((char*)&cpuEnd)+4, 2 ); } - tracy_force_inline int64_t GpuStart() const { return int64_t( _gpuStart_child1 ) >> 16; } - tracy_force_inline void SetGpuStart( int64_t gpuStart ) { /*assert( gpuStart < (int64_t)( 1ull << 47 ) );*/ memcpy( ((char*)&_gpuStart_child1)+2, &gpuStart, 4 ); memcpy( ((char*)&_gpuStart_child1)+6, ((char*)&gpuStart)+4, 2 ); } - tracy_force_inline int64_t GpuEnd() const { return int64_t( _gpuEnd_child2 ) >> 16; } - tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { assert( gpuEnd < (int64_t)( 1ull << 47 ) ); memcpy( ((char*)&_gpuEnd_child2)+2, &gpuEnd, 4 ); memcpy( ((char*)&_gpuEnd_child2)+6, ((char*)&gpuEnd)+4, 2 ); } - tracy_force_inline int16_t SrcLoc() const { return int16_t( _cpuStart_srcloc & 0xFFFF ); } - tracy_force_inline void SetSrcLoc( int16_t srcloc ) { memcpy( &_cpuStart_srcloc, &srcloc, 2 ); } - tracy_force_inline uint16_t Thread() const { return uint16_t( _cpuEnd_thread & 0xFFFF ); } - tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_cpuEnd_thread, &thread, 2 ); } - tracy_force_inline int32_t Child() const { return int32_t( uint32_t( _gpuStart_child1 & 0xFFFF ) | ( uint32_t( _gpuEnd_child2 & 0xFFFF ) << 16 ) ); } - tracy_force_inline void SetChild( int32_t child ) { memcpy( &_gpuStart_child1, &child, 2 ); memcpy( &_gpuEnd_child2, ((char*)&child)+2, 2 ); } - - uint64_t _cpuStart_srcloc; - uint64_t _cpuEnd_thread; - uint64_t _gpuStart_child1; - uint64_t _gpuEnd_child2; - Int24 callstack; - uint16_t query_id; +// Makes a ZoneEvent and GpuExtra look like the old GpuEvent, for syntactical convenience. Not for +// bulk data structures. Template parameter `is_const` makes the members const (otherwise the +// adapter can't be constructed from const references). +template +struct EventAdapter +{ + using event_type = std::conditional::type; + using extra_type = std::conditional::type; + tracy_force_inline EventAdapter( event_type& ev, extra_type& ex ) : event( ev ) , extra( ex ) , thread( ex.thread ) , callstack( ex.callstack ) , query_id( ex.query_id ) {} + + // GpuEvent compatibility functions + tracy_force_inline int64_t CpuStart() const { return extra.otherStart.Val(); } + tracy_force_inline void SetCpuStart( int64_t cpuStart ) { assert( cpuStart < (int64_t)( 1ull << 47 ) ); extra.otherStart.SetVal( cpuStart ); } + tracy_force_inline int64_t CpuEnd() const { return extra.otherEnd.Val(); } + tracy_force_inline void SetCpuEnd( int64_t cpuEnd ) { assert( cpuEnd < (int64_t)( 1ull << 47 ) ); extra.otherEnd.SetVal( cpuEnd ); } + tracy_force_inline int64_t GpuStart() const { return event.Start(); } + tracy_force_inline void SetGpuStart( int64_t gpuStart ) { event.SetStart( gpuStart ); } + tracy_force_inline int64_t GpuEnd() const { return event.End(); } + tracy_force_inline void SetGpuEnd( int64_t gpuEnd ) { event.SetEnd( gpuEnd ); } + tracy_force_inline int16_t SrcLoc() const { return event.SrcLoc(); } + tracy_force_inline void SetSrcLoc( int16_t srcloc ) { event.SetSrcLoc( srcloc ); } + tracy_force_inline uint16_t Thread() const { return extra.thread; } + tracy_force_inline void SetThread( uint16_t thread ) { extra.thread = thread; } + tracy_force_inline int32_t Child() const { return event.Child(); } + tracy_force_inline void SetChild( int32_t child ) { event.SetChild( child ); } + + tracy_force_inline operator short_ptr() { return &event; } + tracy_force_inline operator event_type*() { return &event; } + tracy_force_inline operator event_type&() { return event; } + tracy_force_inline operator event_type&() const { return event; } + tracy_force_inline EventAdapter* operator->() { return this; } + event_type& event; + extra_type& extra; + std::conditional::type&, decltype( extra.thread )&>::type thread; + std::conditional::type&, decltype( extra.callstack )&>::type callstack; + std::conditional::type&, decltype( extra.query_id )&>::type query_id; }; -enum { GpuEventSize = sizeof( GpuEvent ) }; -static_assert( std::is_standard_layout::value, "GpuEvent is not standard layout" ); - - struct MemEvent { tracy_force_inline uint64_t Ptr() const { return uint64_t( int64_t( _ptr_csalloc1 ) >> 8 ); } @@ -754,8 +769,8 @@ struct ThreadData struct GpuCtxThreadData { - Vector> timeline; - Vector> stack; + Vector> timeline; + Vector> stack; }; struct GpuCtxData @@ -777,7 +792,7 @@ struct GpuCtxData unordered_flat_map threadData; unordered_flat_map noteNames; unordered_flat_map> notes; - short_ptr query[64*1024]; + short_ptr query[64*1024]; }; enum { GpuCtxDataSize = sizeof( GpuCtxData ) }; diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 483e3233d0..0269b19b39 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -280,6 +280,7 @@ Worker::Worker( const char* addr, uint16_t port, int64_t memoryLimit ) m_data.localThreadCompress.InitZero(); m_data.callstackPayload.push_back( nullptr ); m_data.zoneExtra.push_back( ZoneExtra {} ); + m_data.gpuExtra.push_back( GpuExtra {} ); m_data.symbolLocInline.push_back( std::numeric_limits::max() ); m_data.memory = m_slab.AllocInit(); m_data.memNameMap.emplace( 0, m_data.memory ); @@ -319,6 +320,7 @@ Worker::Worker( const char* name, const char* program, const std::vector::max() ); m_data.memory = m_slab.AllocInit(); m_data.memNameMap.emplace( 0, m_data.memory ); @@ -864,7 +866,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow int16_t id; uint64_t cnt; f.Read2( id, cnt ); - auto status = m_data.gpuSourceLocationZones.emplace( id, GpuSourceLocationZones() ); + auto status = m_data.gpuSourceLocationZones.emplace( id, SourceLocationZones() ); assert( status.second ); status.first->second.zones.reserve( cnt ); } @@ -1096,10 +1098,11 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow s_loadProgress.subProgress.store( 0, std::memory_order_relaxed ); f.Read( sz ); m_data.gpuChildren.reserve_exact( sz, m_slab ); - memset( (char*)m_data.gpuChildren.data(), 0, sizeof( Vector> ) * sz ); + memset( (char*)m_data.gpuChildren.data(), 0, sizeof( Vector> ) * sz ); childIdx = 0; f.Read( sz ); m_data.gpuData.reserve_exact( sz, m_slab ); + m_data.gpuExtra.push_back( GpuExtra {} ); for( uint64_t i=0; i(); @@ -1749,15 +1752,15 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow m_data.sourceLocationZonesReady = true; } ) ); - std::function>&, uint16_t)> ProcessTimelineGpu; - ProcessTimelineGpu = [this, &ProcessTimelineGpu] ( Vector>& _vec, uint16_t thread ) + std::function>&, uint16_t)> ProcessTimelineGpu; + ProcessTimelineGpu = [this, &ProcessTimelineGpu] ( Vector>& _vec, uint16_t thread ) { if( m_shutdown.load( std::memory_order_relaxed ) ) return; assert( _vec.is_magic() ); - auto& vec = *(Vector*)( &_vec ); + auto& vec = *(Vector*)( &_vec ); for( auto& zone : vec ) { - if( zone.GpuEnd() >= 0 ) ReconstructZoneStatistics( zone, thread ); + if( zone.End() >= 0 ) ReconstructZoneStatistics( zone, thread ); if( zone.Child() >= 0 ) { ProcessTimelineGpu( GetGpuChildrenMutable( zone.Child() ), thread ); @@ -2356,14 +2359,14 @@ const uint64_t* Worker::GetInlineSymbolList( uint64_t sym, uint32_t len ) return it; } -int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev ) +int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev, const Vector>>& childArray ) { assert( !ev.IsEndValid() ); auto ptr = &ev; for(;;) { - if( !ptr->HasChildren() ) return ptr->Start(); - auto& children = GetZoneChildren( ptr->Child() ); + if( !ptr->HasChildren() ) return ptr->Start() >= 0 ? ptr->Start() : m_data.lastTime; + auto& children = childArray[ ptr->Child() ]; if( children.is_magic() ) { auto& c = *(Vector*)&children; @@ -2377,27 +2380,6 @@ int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev ) } } -int64_t Worker::GetZoneEndImpl( const GpuEvent& ev ) -{ - assert( ev.GpuEnd() < 0 ); - auto ptr = &ev; - for(;;) - { - if( ptr->Child() < 0 ) return ptr->GpuStart() >= 0 ? ptr->GpuStart() : m_data.lastTime; - auto& children = GetGpuChildren( ptr->Child() ); - if( children.is_magic() ) - { - auto& c = *(Vector*)&children; - ptr = &c.back(); - } - else - { - ptr = children.back(); - } - if( ptr->GpuEnd() >= 0 ) return ptr->GpuEnd(); - } -} - uint32_t Worker::FindStringIdx( const char* str ) const { if( !str ) return 0; @@ -2574,12 +2556,6 @@ const char* Worker::GetZoneName( const ZoneEvent& ev, const SourceLocation& srcl } } -const char* Worker::GetZoneName( const GpuEvent& ev ) const -{ - auto& srcloc = GetSourceLocation( ev.SrcLoc() ); - return GetZoneName( srcloc ); -} - static bool strstr_nocase( const char* l, const char* r ) { const auto lsz = strlen( l ); @@ -3507,12 +3483,12 @@ Worker::SourceLocationZones* Worker::GetSourceLocationZonesReal( uint16_t srcloc return &it->second; } -Worker::GpuSourceLocationZones* Worker::GetGpuSourceLocationZonesReal( uint16_t srcloc ) +Worker::SourceLocationZones* Worker::GetGpuSourceLocationZonesReal( uint16_t srcloc ) { auto it = m_data.gpuSourceLocationZones.find( srcloc ); if( it == m_data.gpuSourceLocationZones.end() ) { - it = m_data.gpuSourceLocationZones.emplace( srcloc, GpuSourceLocationZones() ).first; + it = m_data.gpuSourceLocationZones.emplace( srcloc, SourceLocationZones() ).first; } m_data.gpuZonesLast.first = srcloc; m_data.gpuZonesLast.second = &it->second; @@ -5761,14 +5737,30 @@ void Worker::ProcessGpuNewContext( const QueueGpuNewContext& ev ) m_gpuCtxMap[ev.context] = gpu; } -void Worker::ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ) +GpuExtra& Worker::AllocGpuExtra( ZoneEvent& ev ) +{ + assert( ev.extra == 0 ); + ev.extra = uint32_t( m_data.gpuExtra.size() ); + auto& extra = m_data.gpuExtra.push_next(); + memset( (char*)&extra, 0, sizeof( extra ) ); + return extra; +} + +EventAdapter Worker::AllocGpuEvent() +{ + auto zone = AllocZoneEvent(); + auto& extra = AllocGpuExtra(*zone); + return { *zone, extra }; +}; + +void Worker::ProcessGpuZoneBeginImpl( EventAdapter zone, const QueueGpuZoneBegin& ev, bool serial ) { CheckSourceLocation( ev.srcloc ); zone->SetSrcLoc( ShrinkSourceLocation( ev.srcloc ) ); ProcessGpuZoneBeginImplCommon( zone, ev, serial ); } -void Worker::ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) +void Worker::ProcessGpuZoneBeginAllocSrcLocImpl( EventAdapter zone, const QueueGpuZoneBeginLean& ev, bool serial ) { assert( m_pendingSourceLocationPayload != 0 ); zone->SetSrcLoc( m_pendingSourceLocationPayload ); @@ -5776,7 +5768,7 @@ void Worker::ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZ m_pendingSourceLocationPayload = 0; } -void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ) +void Worker::ProcessGpuZoneBeginImplCommon( EventAdapter zone, const QueueGpuZoneBeginLean& ev, bool serial ) { m_data.gpuCnt++; @@ -5792,12 +5784,13 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe { cpuTime = RefTime( m_refTimeThread, ev.cpuTime ); } + const auto time = TscTime( cpuTime ); zone->SetCpuStart( time ); zone->SetCpuEnd( -1 ); zone->SetGpuStart( -1 ); zone->SetGpuEnd( -1 ); - zone->callstack.SetVal( 0 ); + zone->callstack.SetVal(0); zone->SetChild( -1 ); zone->query_id = ev.queryId; @@ -5831,7 +5824,7 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe if( back->Child() < 0 ) { back->SetChild( int32_t( m_data.gpuChildren.size() ) ); - m_data.gpuChildren.push_back( Vector>() ); + m_data.gpuChildren.push_back( Vector>() ); } timeline = &m_data.gpuChildren[back->Child()]; } @@ -5845,13 +5838,13 @@ void Worker::ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBe void Worker::ProcessGpuZoneBegin( const QueueGpuZoneBegin& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocGpuEvent(); ProcessGpuZoneBeginImpl( zone, ev, serial ); } void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocGpuEvent(); ProcessGpuZoneBeginImpl( zone, ev, serial ); if( serial ) { @@ -5871,13 +5864,13 @@ void Worker::ProcessGpuZoneBeginCallstack( const QueueGpuZoneBegin& ev, bool ser void Worker::ProcessGpuZoneBeginAllocSrcLoc( const QueueGpuZoneBeginLean& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocGpuEvent(); ProcessGpuZoneBeginAllocSrcLocImpl( zone, ev, serial ); } void Worker::ProcessGpuZoneBeginAllocSrcLocCallstack( const QueueGpuZoneBeginLean& ev, bool serial ) { - auto zone = m_slab.Alloc(); + auto zone = AllocGpuEvent(); ProcessGpuZoneBeginAllocSrcLocImpl( zone, ev, serial ); if( serial ) { @@ -5919,7 +5912,7 @@ void Worker::ProcessGpuZoneEnd( const QueueGpuZoneEnd& ev, bool serial ) cpuTime = RefTime( m_refTimeThread, ev.cpuTime ); } const auto time = TscTime( cpuTime ); - zone->SetCpuEnd( time ); + GetGpuExtraMutable(*zone).SetCpuEnd(time); if( m_data.lastTime < time ) m_data.lastTime = time; } @@ -5967,8 +5960,9 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) } } - auto zone = ctx->query[ev.queryId]; - assert( zone ); + auto evt = ctx->query[ev.queryId]; + assert( evt ); + auto zone = GetGpuExtraMutable(*evt); ctx->query[ev.queryId] = nullptr; if( zone->GpuStart() < 0 ) @@ -5984,7 +5978,7 @@ void Worker::ProcessGpuTime( const QueueGpuTime& ev ) const auto timeSpan = gpuTime - gpuStart; if( timeSpan > 0 ) { - GpuZoneThreadData ztd; + ZoneThreadData ztd; ztd.SetZone( zone ); ztd.SetThread( zone->Thread() ); auto slz = GetGpuSourceLocationZones( zone->SrcLoc() ); @@ -7661,14 +7655,14 @@ int64_t Worker::ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t refT } } -void Worker::ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ) +void Worker::ReadTimeline( FileRead& f, ZoneEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ) { uint64_t sz; f.Read( sz ); ReadTimelineHaveSize( f, zone, refTime, refGpuTime, childIdx, sz, hasQueryId ); } -void Worker::ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId ) +void Worker::ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId ) { if( sz == 0 ) { @@ -7740,18 +7734,18 @@ void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint } } -void Worker::ReconstructZoneStatistics( GpuEvent& zone, uint16_t thread ) +void Worker::ReconstructZoneStatistics( ZoneEvent& zone, uint16_t thread ) { - assert( zone.GpuEnd() >= 0 ); - auto timeSpan = zone.GpuEnd() - zone.GpuStart(); + assert( zone.End() >= 0 ); + auto timeSpan = zone.End() - zone.Start(); if( timeSpan > 0 ) { auto it = m_data.gpuSourceLocationZones.find( zone.SrcLoc() ); if( it == m_data.gpuSourceLocationZones.end() ) { - it = m_data.gpuSourceLocationZones.emplace( zone.SrcLoc(), GpuSourceLocationZones {} ).first; + it = m_data.gpuSourceLocationZones.emplace( zone.SrcLoc(), SourceLocationZones {} ).first; } - GpuZoneThreadData ztd; + ZoneThreadData ztd; ztd.SetZone( &zone ); ztd.SetThread( thread ); auto& slz = it->second; @@ -7768,12 +7762,6 @@ void Worker::CountZoneStatistics( ZoneEvent* zone ) auto cnt = GetSourceLocationZonesCnt( zone->SrcLoc() ); (*cnt)++; } - -void Worker::CountZoneStatistics( GpuEvent* zone ) -{ - auto cnt = GetGpuSourceLocationZonesCnt( zone->SrcLoc() ); - (*cnt)++; -} #endif int64_t Worker::ReadTimeline( FileRead& f, Vector>& _vec, uint32_t size, int64_t refTime, int32_t& childIdx ) @@ -7821,15 +7809,15 @@ int64_t Worker::ReadTimeline( FileRead& f, Vector>& _vec, u return refTime; } -void Worker::ReadTimeline( FileRead& f, Vector>& _vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ) +void Worker::ReadTimeline( FileRead& f, Vector>& _vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ) { assert( size != 0 ); const auto lp = s_loadProgress.subProgress.load( std::memory_order_relaxed ); s_loadProgress.subProgress.store( lp + size, std::memory_order_relaxed ); - auto& vec = *(Vector*)( &_vec ); + auto& vec = *(Vector*)( &_vec ); vec.set_magic(); vec.reserve_exact( size, m_slab ); - auto zone = vec.begin(); + auto zonePtr = vec.begin(); auto end = vec.end(); do { @@ -7837,6 +7825,7 @@ void Worker::ReadTimeline( FileRead& f, Vector>& _vec, uint6 int16_t srcloc; uint16_t thread; uint64_t childSz; + auto zone = EventAdapter(*zonePtr, AllocGpuExtra(*zonePtr)); f.Read6( tcpu, tgpu, srcloc, zone->callstack, thread, childSz ); zone->SetSrcLoc( srcloc ); zone->SetThread( thread ); @@ -7854,7 +7843,7 @@ void Worker::ReadTimeline( FileRead& f, Vector>& _vec, uint6 zone->SetGpuEnd( refGpuTime ); if( hasQueryId ) f.Read( zone->query_id ); } - while( ++zone != end ); + while( ++zonePtr != end ); } void Worker::Disconnect() @@ -8560,17 +8549,17 @@ void Worker::WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime ) } } -void Worker::WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime, int64_t& refGpuTime ) +void Worker::WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime, int64_t& refGpuTime ) { uint64_t sz = vec.size(); f.Write( &sz, sizeof( sz ) ); if( vec.is_magic() ) { - WriteTimelineImpl>( f, *(Vector*)( &vec ), refTime, refGpuTime ); + WriteTimelineImpl>( f, *(Vector*)( &vec ), refTime, refGpuTime ); } else { - WriteTimelineImpl>( f, vec, refTime, refGpuTime ); + WriteTimelineImpl>( f, vec, refTime, refGpuTime ); } } @@ -8580,7 +8569,7 @@ void Worker::WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime, in Adapter a; for( auto& val : vec ) { - auto& v = a(val); + auto& v = GetGpuExtra(a(val)); WriteTimeOffset( f, refTime, v.CpuStart() ); WriteTimeOffset( f, refGpuTime, v.GpuStart() ); const int16_t srcloc = v.SrcLoc(); diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 2aa09a0eb3..8686a1dacf 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -134,17 +134,6 @@ class Worker }; enum { ZoneThreadDataSize = sizeof( ZoneThreadData ) }; - struct GpuZoneThreadData - { - tracy_force_inline GpuEvent* Zone() const { return (GpuEvent*)( _zone_thread >> 16 ); } - tracy_force_inline void SetZone( GpuEvent* zone ) { auto z64 = (uint64_t)zone; assert( ( z64 & 0xFFFF000000000000 ) == 0 ); memcpy( ((char*)&_zone_thread)+2, &z64, 4 ); memcpy( ((char*)&_zone_thread)+6, ((char*)&z64)+4, 2 ); } - tracy_force_inline uint16_t Thread() const { return uint16_t( _zone_thread & 0xFFFF ); } - tracy_force_inline void SetThread( uint16_t thread ) { memcpy( &_zone_thread, &thread, 2 ); } - - uint64_t _zone_thread; - }; - enum { GpuZoneThreadDataSize = sizeof( GpuZoneThreadData ) }; - struct CpuThreadTopology { uint32_t package; @@ -215,17 +204,6 @@ class Worker unordered_flat_map threadCnt; }; - struct GpuSourceLocationZones - { - struct GpuZtdSort { bool operator()( const GpuZoneThreadData& lhs, const GpuZoneThreadData& rhs ) const { return lhs.Zone()->GpuStart() < rhs.Zone()->GpuStart(); } }; - - SortedVector zones; - int64_t min = std::numeric_limits::max(); - int64_t max = std::numeric_limits::min(); - int64_t total = 0; - double sumSq = 0; - }; - struct CallstackFrameIdHash { size_t operator()( const CallstackFrameId& id ) const { return id.data; } @@ -287,6 +265,7 @@ class Worker StringDiscovery plots; Vector threads; Vector zoneExtra; + Vector gpuExtra; MemData* memory; unordered_flat_map memNameMap; uint64_t zonesCnt = 0; @@ -313,7 +292,7 @@ class Worker #ifndef TRACY_NO_STATISTICS unordered_flat_map sourceLocationZones; bool sourceLocationZonesReady = false; - unordered_flat_map gpuSourceLocationZones; + unordered_flat_map gpuSourceLocationZones; bool gpuSourceLocationZonesReady = false; #else unordered_flat_map sourceLocationZonesCnt; @@ -357,7 +336,7 @@ class Worker ThreadCompress externalThreadCompress; Vector>> zoneChildren; - Vector>> gpuChildren; + Vector>> gpuChildren; #ifndef TRACY_NO_STATISTICS Vector> ghostChildren; Vector ghostFrames; @@ -384,7 +363,7 @@ class Worker std::pair shrinkSrclocLast = std::make_pair( std::numeric_limits::max(), 0 ); #ifndef TRACY_NO_STATISTICS std::pair srclocZonesLast = std::make_pair( 0, nullptr ); - std::pair gpuZonesLast = std::make_pair( 0, nullptr ); + std::pair gpuZonesLast = std::make_pair( 0, nullptr ); #else std::pair srclocCntLast = std::make_pair( 0, nullptr ); std::pair gpuCntLast = std::make_pair( 0, nullptr ); @@ -599,10 +578,9 @@ class Worker // GetZoneEnd() will try to infer the end time by looking at child zones (parent zone can't end // before its children have ended). // GetZoneEndDirect() will only return zone's direct timing data, without looking at children. - tracy_force_inline int64_t GetZoneEnd( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev ); } - tracy_force_inline int64_t GetZoneEnd( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : GetZoneEndImpl( ev ); } + tracy_force_inline int64_t GetZoneEnd( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev, m_data.zoneChildren ); } + tracy_force_inline int64_t GetZoneEndGPU( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev, m_data.gpuChildren ); } static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : ev.Start(); } - static tracy_force_inline int64_t GetZoneEndDirect( const GpuEvent& ev ) { return ev.GpuEnd() >= 0 ? ev.GpuEnd() : ev.GpuStart(); } uint32_t FindStringIdx( const char* str ) const; const char* GetString( uint64_t ptr ) const; @@ -617,10 +595,9 @@ class Worker const char* GetZoneName( const SourceLocation& srcloc ) const; const char* GetZoneName( const ZoneEvent& ev ) const; const char* GetZoneName( const ZoneEvent& ev, const SourceLocation& srcloc ) const; - const char* GetZoneName( const GpuEvent& ev ) const; tracy_force_inline const Vector>& GetZoneChildren( int32_t idx ) const { return m_data.zoneChildren[idx]; } - tracy_force_inline const Vector>& GetGpuChildren( int32_t idx ) const { return m_data.gpuChildren[idx]; } + tracy_force_inline const Vector>& GetGpuChildren( int32_t idx ) const { return m_data.gpuChildren[idx]; } #ifndef TRACY_NO_STATISTICS tracy_force_inline const Vector& GetGhostChildren( int32_t idx ) const { return m_data.ghostChildren[idx]; } tracy_force_inline const GhostKey& GetGhostFrame( const Int24& frame ) const { return m_data.ghostFrames[frame.Val()]; } @@ -628,6 +605,7 @@ class Worker tracy_force_inline const bool HasZoneExtra( const ZoneEvent& ev ) const { return ev.extra != 0; } tracy_force_inline const ZoneExtra& GetZoneExtra( const ZoneEvent& ev ) const { return m_data.zoneExtra[ev.extra]; } + tracy_force_inline const EventAdapter GetGpuExtra( const ZoneEvent& ev ) const { return { ev, m_data.gpuExtra[ev.extra] }; } std::vector GetMatchingSourceLocation( const char* query, bool ignoreCase ) const; @@ -637,7 +615,7 @@ class Worker SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ); const SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ) const; const unordered_flat_map& GetSourceLocationZones() const { return m_data.sourceLocationZones; } - const unordered_flat_map& GetGpuSourceLocationZones() const { return m_data.gpuSourceLocationZones; } + const unordered_flat_map& GetGpuSourceLocationZones() const { return m_data.gpuSourceLocationZones; } bool AreSourceLocationZonesReady() const { return m_data.sourceLocationZonesReady; } bool AreGpuSourceLocationZonesReady() const { return m_data.gpuSourceLocationZonesReady; } bool IsCpuUsageReady() const { return m_data.ctxUsageReady; } @@ -810,9 +788,11 @@ class Worker tracy_force_inline ZoneEvent* AllocZoneEvent(); tracy_force_inline void ProcessZoneBeginImpl( ZoneEvent* zone, const QueueZoneBegin& ev ); tracy_force_inline void ProcessZoneBeginAllocSrcLocImpl( ZoneEvent* zone, const QueueZoneBeginLean& ev ); - tracy_force_inline void ProcessGpuZoneBeginImpl( GpuEvent* zone, const QueueGpuZoneBegin& ev, bool serial ); - tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocImpl( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); - tracy_force_inline void ProcessGpuZoneBeginImplCommon( GpuEvent* zone, const QueueGpuZoneBeginLean& ev, bool serial ); + tracy_force_inline GpuExtra& AllocGpuExtra( ZoneEvent& ev ); + tracy_force_inline EventAdapter AllocGpuEvent(); + tracy_force_inline void ProcessGpuZoneBeginImpl( EventAdapter zone, const QueueGpuZoneBegin& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginAllocSrcLocImpl( EventAdapter zone, const QueueGpuZoneBeginLean& ev, bool serial ); + tracy_force_inline void ProcessGpuZoneBeginImplCommon( EventAdapter zone, const QueueGpuZoneBeginLean& ev, bool serial ); tracy_force_inline void ProcessPlotDataImpl( uint64_t name, int64_t evTime, double val ); tracy_force_inline MemEvent* ProcessMemAllocImpl( MemData& memdata, const QueueMemAlloc& ev ); tracy_force_inline MemEvent* ProcessMemFreeImpl( MemData& memdata, const QueueMemFree& ev ); @@ -876,12 +856,12 @@ class Worker } SourceLocationZones* GetSourceLocationZonesReal( uint16_t srcloc ); - GpuSourceLocationZones* GetGpuSourceLocationZones( uint16_t srcloc ) + SourceLocationZones* GetGpuSourceLocationZones( uint16_t srcloc ) { if( m_data.gpuZonesLast.first == srcloc ) return m_data.gpuZonesLast.second; return GetGpuSourceLocationZonesReal( srcloc ); } - GpuSourceLocationZones* GetGpuSourceLocationZonesReal( uint16_t srcloc ); + SourceLocationZones* GetGpuSourceLocationZonesReal( uint16_t srcloc ); #else uint64_t* GetSourceLocationZonesCnt( uint16_t srcloc ) { @@ -948,7 +928,7 @@ class Worker void CacheSourceFromFile( const char* fn ); tracy_force_inline Vector>& GetZoneChildrenMutable( int32_t idx ) { return m_data.zoneChildren[idx]; } - tracy_force_inline Vector>& GetGpuChildrenMutable( int32_t idx ) { return m_data.gpuChildren[idx]; } + tracy_force_inline Vector>& GetGpuChildrenMutable( int32_t idx ) { return m_data.gpuChildren[idx]; } #ifndef TRACY_NO_STATISTICS tracy_force_inline Vector& GetGhostChildrenMutable( int32_t idx ) { return m_data.ghostChildren[idx]; } #endif @@ -964,31 +944,30 @@ class Worker tracy_force_inline int64_t ReadTimeline( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx ); tracy_force_inline int64_t ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t refTime, int32_t& childIdx, uint32_t sz ); - tracy_force_inline void ReadTimeline( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ); - tracy_force_inline void ReadTimelineHaveSize( FileRead& f, GpuEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId ); + tracy_force_inline void ReadTimeline( FileRead& f, ZoneEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ); + tracy_force_inline void ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId ); #ifndef TRACY_NO_STATISTICS tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ); - tracy_force_inline void ReconstructZoneStatistics( GpuEvent& zone, uint16_t thread ); + tracy_force_inline void ReconstructZoneStatistics( ZoneEvent& zone, uint16_t thread ); #else tracy_force_inline void CountZoneStatistics( ZoneEvent* zone ); - tracy_force_inline void CountZoneStatistics( GpuEvent* zone ); #endif tracy_force_inline ZoneExtra& GetZoneExtraMutable( const ZoneEvent& ev ) { return m_data.zoneExtra[ev.extra]; } + tracy_force_inline EventAdapter GetGpuExtraMutable( ZoneEvent& ev ) { return {ev, m_data.gpuExtra[ev.extra]}; } tracy_force_inline ZoneExtra& AllocZoneExtra( ZoneEvent& ev ); tracy_force_inline ZoneExtra& RequestZoneExtra( ZoneEvent& ev ); - int64_t GetZoneEndImpl( const ZoneEvent& ev ); - int64_t GetZoneEndImpl( const GpuEvent& ev ); + int64_t GetZoneEndImpl( const ZoneEvent& ev, const Vector>>& childArray ); void UpdateMbps( int64_t td ); int64_t ReadTimeline( FileRead& f, Vector>& vec, uint32_t size, int64_t refTime, int32_t& childIdx ); - void ReadTimeline( FileRead& f, Vector>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ); + void ReadTimeline( FileRead& f, Vector>& vec, uint64_t size, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, bool hasQueryId ); tracy_force_inline void WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime ); - tracy_force_inline void WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime, int64_t& refGpuTime ); + tracy_force_inline void WriteTimeline( FileWrite& f, const Vector>& vec, int64_t& refTime, int64_t& refGpuTime ); template void WriteTimelineImpl( FileWrite& f, const V& vec, int64_t& refTime ); template From eaa7f8060671291b75a3c089503f78bd56d1fb52 Mon Sep 17 00:00:00 2001 From: Eric Eaton Date: Tue, 7 Oct 2025 01:02:22 -0700 Subject: [PATCH 2/5] Eliminate the function GetZoneCtx The function GetZoneCtx isn't needed. This patch introduces a small structure to help transport the context around to avoid the expensive search. The GPU timeline function take the context as a parameter now instead of the thread ID, since the supplied thread ID is derived from the context in every invocation. --- profiler/src/profiler/TracyView.hpp | 21 ++--- .../src/profiler/TracyView_GpuTimeline.cpp | 37 ++++---- .../src/profiler/TracyView_Navigation.cpp | 5 +- profiler/src/profiler/TracyView_Timeline.cpp | 2 +- profiler/src/profiler/TracyView_Utility.cpp | 92 ++++++------------- profiler/src/profiler/TracyView_ZoneInfo.cpp | 62 ++++++------- server/TracyEvent.hpp | 11 +++ 7 files changed, 102 insertions(+), 128 deletions(-) diff --git a/profiler/src/profiler/TracyView.hpp b/profiler/src/profiler/TracyView.hpp index f8a2b37c4e..0fa49a520a 100644 --- a/profiler/src/profiler/TracyView.hpp +++ b/profiler/src/profiler/TracyView.hpp @@ -255,11 +255,11 @@ class View void DrawZoneList( const TimelineContext& ctx, const std::vector& drawList, int offset, uint64_t tid, int maxDepth, double margin ); void DrawThreadCropper( const int depth, const uint64_t tid, const float xPos, const float yPos, const float ostep, const float cropperWidth, const bool hasCtxSwitches ); void DrawContextSwitchList( const TimelineContext& ctx, const std::vector& drawList, const Vector& ctxSwitch, int offset, int endOffset, bool isFiber ); - int DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); + int DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, const GpuCtxData* ctx, float yMin, float yMax, int64_t begin, int drift ); template - int DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); + int DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, const GpuCtxData* ctx, float yMin, float yMax, int64_t begin, int drift ); template - int SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ); + int SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int offset, int depth, const GpuCtxData* ctx, float yMin, float yMax, int64_t begin, int drift ); void DrawLockHeader( uint32_t id, const LockMap& lockmap, const SourceLocation& srcloc, bool hover, ImDrawList* draw, const ImVec2& wpos, float w, float ty, float offset, uint8_t tid ); int DrawLocks( const TimelineContext& ctx, const std::vector>& lockDraw, uint64_t tid, int _offset, LockHighlight& highlight ); void DrawPlotPoint( const ImVec2& wpos, float x, float y, int offset, uint32_t color, bool hover, bool hasPrev, const PlotItem& item, double prev, PlotType type, PlotValueFormatting format, float PlotHeight, uint64_t name ); @@ -320,7 +320,7 @@ class View template void DrawZoneInfoChildren( const V& children, int64_t ztime ); template - void DrawGpuInfoChildren( const V& children, int64_t ztime ); + void DrawGpuInfoChildren( const V& children, int64_t ztime, const GpuCtxData* ctx ); void HandleRange( Range& range, int64_t timespan, const ImVec2& wpos, float w ); void HandleTimelineMouse( int64_t timespan, const ImVec2& wpos, float w ); @@ -338,16 +338,16 @@ class View ZoneColorData GetZoneColorData( const ZoneEvent& ev ); void ZoomToZone( const ZoneEvent& ev ); - void ZoomToZoneGPU( const ZoneEvent& ev ); + void ZoomToZoneGPU( const ZoneEventC ev ); void ZoomToPrevFrame(); void ZoomToNextFrame(); void CenterAtTime( int64_t t ); void ShowZoneInfo( const ZoneEvent& ev ); - void ShowZoneInfo( const ZoneEvent& ev, uint64_t thread ); + void ShowZoneInfo( const ZoneEventC ev, uint64_t thread ); void ZoneTooltip( const ZoneEvent& ev ); - void ZoneTooltipGPU( const ZoneEvent& ev ); + void ZoneTooltipGPU( const ZoneEventC ev ); void CallstackTooltip( uint32_t idx ); void CallstackTooltipContents( uint32_t idx ); void CrashTooltip(); @@ -357,11 +357,10 @@ class View const ZoneEvent* GetZoneChild( const ZoneEvent& zone, int64_t time ) const; bool IsZoneReentry( const ZoneEvent& zone ) const; bool IsZoneReentry( const ZoneEvent& zone, uint64_t tid ) const; - const ZoneEvent* GetZoneParentGPU( const ZoneEvent& zone ) const; + ZoneEventC GetZoneParentGPU( const ZoneEventC zone ) const; const ThreadData* GetZoneThreadData( const ZoneEvent& zone ) const; uint64_t GetZoneThread( const ZoneEvent& zone ) const; uint64_t GetZoneThreadGPU( const EventAdapter& zone ) const; - const GpuCtxData* GetZoneCtx( const ZoneEvent& zone ) const; bool FindMatchingZone( int prev0, int prev1, int flags ); const ZoneEvent* FindZoneAtTime( uint64_t thread, int64_t time ) const; uint64_t GetFrameNumber( const FrameData& fd, int i ) const; @@ -477,7 +476,7 @@ class View DecayValue m_msgHighlight = nullptr; DecayValue m_lockHoverHighlight = InvalidId; DecayValue m_msgToFocus = nullptr; - const ZoneEvent* m_gpuInfoWindow = nullptr; + ZoneEventC m_gpuInfoWindow = {nullptr, nullptr}; const ZoneEvent* m_gpuHighlight; uint64_t m_gpuInfoWindowThread; uint32_t m_callstackInfoWindow = 0; @@ -573,7 +572,7 @@ class View BuzzAnim m_statBuzzAnim; Vector m_zoneInfoStack; - Vector m_gpuInfoStack; + Vector m_gpuInfoStack; SourceContents m_srcHintCache; std::unique_ptr m_sourceView; diff --git a/profiler/src/profiler/TracyView_GpuTimeline.cpp b/profiler/src/profiler/TracyView_GpuTimeline.cpp index 08ba7af269..b7ea3303f4 100644 --- a/profiler/src/profiler/TracyView_GpuTimeline.cpp +++ b/profiler/src/profiler/TracyView_GpuTimeline.cpp @@ -46,7 +46,7 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs const auto begin = tlm.front().Start(); const auto drift = GpuDrift( &gpu ); if( !singleThread ) offset += sstep; - const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, gpu.thread, yMin, yMax, begin, drift ); + const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, &gpu, yMin, yMax, begin, drift ); if( partDepth != 0 ) { if( !singleThread ) @@ -73,7 +73,7 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs const auto begin = tl.front()->Start(); const auto drift = GpuDrift( &gpu ); if( !singleThread ) offset += sstep; - const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, gpu.thread, yMin, yMax, begin, drift ); + const auto partDepth = DispatchGpuZoneLevel( tl, hover, pxns, int64_t( nspx ), wpos, offset, 0, &gpu, yMin, yMax, begin, drift ); if( partDepth != 0 ) { if( !singleThread ) @@ -97,7 +97,7 @@ bool View::DrawGpu( const TimelineContext& ctx, const GpuCtxData& gpu, int& offs return depth != 0; } -int View::DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) +int View::DispatchGpuZoneLevel( const Vector>& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, const GpuCtxData *ctx, float yMin, float yMax, int64_t begin, int drift ) { const auto ty = ImGui::GetTextLineHeight(); const auto ostep = ty + 1; @@ -108,28 +108,28 @@ int View::DispatchGpuZoneLevel( const Vector>& vec, bool ho { if( vec.is_magic() ) { - return DrawGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return DrawGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, ctx, yMin, yMax, begin, drift ); } else { - return DrawGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return DrawGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, ctx, yMin, yMax, begin, drift ); } } else { if( vec.is_magic() ) { - return SkipGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return SkipGpuZoneLevel>( *(Vector*)&vec, hover, pxns, nspx, wpos, _offset, depth, ctx, yMin, yMax, begin, drift ); } else { - return SkipGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + return SkipGpuZoneLevel>( vec, hover, pxns, nspx, wpos, _offset, depth, ctx, yMin, yMax, begin, drift ); } } } template -int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) +int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, const GpuCtxData* ctx, float yMin, float yMax, int64_t begin, int drift ) { // cast to uint64_t, so that unended zones (end = -1) are still drawn auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); @@ -159,6 +159,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const auto start = AdjustGpuTime( ev.GpuStart(), begin, drift ); end = AdjustGpuTime( end, begin, drift ); const auto zsz = std::max( ( end - start ) * pxns, pxns * 0.5 ); + const auto zoneThread = ctx->thread != 0 ? ctx->thread : m_worker.DecompressThread( ev.Thread() ); if( zsz < MinVisSize ) { const auto color = GetZoneColor( ev ); @@ -202,16 +203,15 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, } else { - const auto zoneThread = thread != 0 ? thread : m_worker.DecompressThread( ev.Thread() ); - ZoneTooltipGPU( ev ); + ZoneTooltipGPU( { &ev.event, ctx } ); if( IsMouseClicked( 2 ) && rend - start > 0 ) { - ZoomToZoneGPU( ev ); + ZoomToZoneGPU( { &ev.event, ctx } ); } if( IsMouseClicked( 0 ) ) { - ShowZoneInfo( ev, zoneThread ); + ShowZoneInfo( { &ev.event, ctx }, zoneThread ); } m_gpuThread = zoneThread; @@ -231,7 +231,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, { if( ev.Child() >= 0 ) { - const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, ctx, yMin, yMax, begin, drift ); if( d > maxdepth ) maxdepth = d; } @@ -288,16 +288,15 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, if( hover && ImGui::IsMouseHoveringRect( wpos + ImVec2( px0, offset ), wpos + ImVec2( px1, offset + tsz.y + 1 ) ) ) { - const auto zoneThread = thread != 0 ? thread : m_worker.DecompressThread( ev.thread ); - ZoneTooltipGPU( ev ); + ZoneTooltipGPU( { &ev.event, ctx } ); if( !m_zoomAnim.active && IsMouseClicked( 2 ) ) { - ZoomToZoneGPU( ev ); + ZoomToZoneGPU( { &ev.event, ctx } ); } if( IsMouseClicked( 0 ) ) { - ShowZoneInfo( ev, zoneThread ); + ShowZoneInfo( { &ev.event, ctx }, zoneThread ); } m_gpuThread = zoneThread; @@ -312,7 +311,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, } template -int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, uint64_t thread, float yMin, float yMax, int64_t begin, int drift ) +int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const ImVec2& wpos, int _offset, int depth, const GpuCtxData* ctx, float yMin, float yMax, int64_t begin, int drift ) { // cast to uint64_t, so that unended zones (end = -1) are still drawn auto it = std::lower_bound( vec.begin(), vec.end(), std::max( 0, m_vd.zvStart ), [begin, drift] ( const auto& l, const auto& r ) { Adapter a; return (uint64_t)AdjustGpuTime( a(l).End(), begin, drift ) < (uint64_t)r; } ); @@ -357,7 +356,7 @@ int View::SkipGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, { if( ev.Child() >= 0 ) { - const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, thread, yMin, yMax, begin, drift ); + const auto d = DispatchGpuZoneLevel( m_worker.GetGpuChildren( ev.Child() ), hover, pxns, nspx, wpos, _offset, depth, ctx, yMin, yMax, begin, drift ); if( d > maxdepth ) maxdepth = d; } ++it; diff --git a/profiler/src/profiler/TracyView_Navigation.cpp b/profiler/src/profiler/TracyView_Navigation.cpp index 25fa85fb8f..64d0792d1e 100644 --- a/profiler/src/profiler/TracyView_Navigation.cpp +++ b/profiler/src/profiler/TracyView_Navigation.cpp @@ -10,11 +10,12 @@ void View::ZoomToZone( const ZoneEvent& ev ) ZoomToRange( ev.Start(), end ); } -void View::ZoomToZoneGPU( const ZoneEvent& ev ) +void View::ZoomToZoneGPU( const ZoneEventC evC ) { + auto& ev = *evC.event; + auto ctx = evC.ctx; const auto end = m_worker.GetZoneEndGPU( ev ); if( end - ev.Start() <= 0 ) return; - auto ctx = GetZoneCtx( ev ); if( !ctx ) { ZoomToRange( ev.Start(), end ); diff --git a/profiler/src/profiler/TracyView_Timeline.cpp b/profiler/src/profiler/TracyView_Timeline.cpp index 26de28a3c1..95bcfbc2c1 100644 --- a/profiler/src/profiler/TracyView_Timeline.cpp +++ b/profiler/src/profiler/TracyView_Timeline.cpp @@ -473,7 +473,7 @@ void View::DrawTimeline() } if( m_gpuInfoWindow ) { - const auto ex = m_worker.GetGpuExtra( *m_gpuInfoWindow ); + const auto ex = m_worker.GetGpuExtra( *m_gpuInfoWindow.event ); const auto px0 = ( ex.CpuStart() - m_vd.zvStart ) * pxns; const auto px1 = std::max( px0 + std::max( 1.0, pxns * 0.5 ), ( ex.CpuEnd() - m_vd.zvStart ) * pxns ); draw->AddRectFilled( ImVec2( wpos.x + px0, linepos.y ), ImVec2( wpos.x + px1, linepos.y + lineh ), 0x2288DD88 ); diff --git a/profiler/src/profiler/TracyView_Utility.cpp b/profiler/src/profiler/TracyView_Utility.cpp index 7ced6ab75d..03f1c6db90 100644 --- a/profiler/src/profiler/TracyView_Utility.cpp +++ b/profiler/src/profiler/TracyView_Utility.cpp @@ -395,42 +395,41 @@ bool View::IsZoneReentry( const ZoneEvent& zone, uint64_t tid ) const return false; } -const ZoneEvent* View::GetZoneParentGPU( const ZoneEvent& zone ) const +ZoneEventC View::GetZoneParentGPU( const ZoneEventC zoneC ) const { - for( const auto& ctx : m_worker.GetGpuData() ) + auto& zone = *zoneC.event; + auto ctx = zoneC.ctx; + for( const auto& td : ctx->threadData ) { - for( const auto& td : ctx->threadData ) + const ZoneEvent* parent = nullptr; + const Vector>* timeline = &td.second.timeline; + if( timeline->empty() ) continue; + for(;;) { - const ZoneEvent* parent = nullptr; - const Vector>* timeline = &td.second.timeline; - if( timeline->empty() ) continue; - for(;;) + if( timeline->is_magic() ) { - if( timeline->is_magic() ) - { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.Start(); } ); - if( it != vec->begin() ) --it; - if( zone.End() >= 0 && it->Start() > zone.End() ) break; - if( it == &zone ) return parent; - if( it->Child() < 0 ) break; - parent = it; - timeline = &m_worker.GetGpuChildren( parent->Child() ); - } - else - { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->Start(); } ); - if( it != timeline->begin() ) --it; - if( zone.End() >= 0 && (*it)->Start() > zone.End() ) break; - if( *it == &zone ) return parent; - if( (*it)->Child() < 0 ) break; - parent = *it; - timeline = &m_worker.GetGpuChildren( parent->Child() ); - } + auto vec = (Vector*)timeline; + auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.Start(); } ); + if( it != vec->begin() ) --it; + if( zone.End() >= 0 && it->Start() > zone.End() ) break; + if( it == &zone ) return { parent, ctx }; + if( it->Child() < 0 ) break; + parent = it; + timeline = &m_worker.GetGpuChildren( parent->Child() ); + } + else + { + auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->Start(); } ); + if( it != timeline->begin() ) --it; + if( zone.End() >= 0 && (*it)->Start() > zone.End() ) break; + if( *it == &zone ) return { parent, ctx }; + if( (*it)->Child() < 0 ) break; + parent = *it; + timeline = &m_worker.GetGpuChildren( parent->Child() ); } } } - return nullptr; + return { nullptr, nullptr }; } const ThreadData* View::GetZoneThreadData( const ZoneEvent& zone ) const @@ -526,41 +525,6 @@ uint64_t View::GetZoneThreadGPU( const EventAdapter& zone ) const } } -const GpuCtxData* View::GetZoneCtx( const ZoneEvent& zone ) const -{ - for( const auto& ctx : m_worker.GetGpuData() ) - { - for( const auto& td : ctx->threadData ) - { - const Vector>* timeline = &td.second.timeline; - if( timeline->empty() ) continue; - for(;;) - { - if( timeline->is_magic() ) - { - auto vec = (Vector*)timeline; - auto it = std::upper_bound( vec->begin(), vec->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r.Start(); } ); - if( it != vec->begin() ) --it; - if( zone.End() >= 0 && it->Start() > zone.End() ) break; - if( it == &zone ) return ctx; - if( it->Child() < 0 ) break; - timeline = &m_worker.GetGpuChildren( it->Child() ); - } - else - { - auto it = std::upper_bound( timeline->begin(), timeline->end(), zone.Start(), [] ( const auto& l, const auto& r ) { return (uint64_t)l < (uint64_t)r->Start(); } ); - if( it != timeline->begin() ) --it; - if( zone.End() >= 0 && (*it)->Start() > zone.End() ) break; - if( *it == &zone ) return ctx; - if( (*it)->Child() < 0 ) break; - timeline = &m_worker.GetGpuChildren( (*it)->Child() ); - } - } - } - } - return nullptr; -} - int64_t View::GetZoneChildTime( const ZoneEvent& zone, bool gpu ) { int64_t time = 0; diff --git a/profiler/src/profiler/TracyView_ZoneInfo.cpp b/profiler/src/profiler/TracyView_ZoneInfo.cpp index e501327933..345c6ea29e 100644 --- a/profiler/src/profiler/TracyView_ZoneInfo.cpp +++ b/profiler/src/profiler/TracyView_ZoneInfo.cpp @@ -1433,7 +1433,8 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) void View::DrawGpuInfoWindow() { - auto& ev = m_worker.GetGpuExtra(*m_gpuInfoWindow); + auto& ev = m_worker.GetGpuExtra(*m_gpuInfoWindow.event); + auto ctx = m_gpuInfoWindow.ctx; const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto scale = GetScale(); @@ -1444,15 +1445,15 @@ void View::DrawGpuInfoWindow() { if( ImGui::Button( ICON_FA_MICROSCOPE " Zoom to zone" ) ) { - ZoomToZoneGPU( ev ); + ZoomToZoneGPU( m_gpuInfoWindow ); } - auto parent = GetZoneParentGPU( ev ); + auto parent = GetZoneParentGPU( m_gpuInfoWindow ); if( parent ) { ImGui::SameLine(); if( ImGui::Button( ICON_FA_ARROW_UP " Go to parent" ) ) { - ShowZoneInfo( *parent, m_gpuInfoWindowThread ); + ShowZoneInfo( parent, m_gpuInfoWindowThread ); } } if( ev.callstack.Val() != 0 ) @@ -1535,7 +1536,6 @@ void View::DrawGpuInfoWindow() TextDisabledUnformatted( buf ); } TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) ); - auto ctx = GetZoneCtx( ev ); if( !ctx ) { TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) ); @@ -1577,11 +1577,11 @@ void View::DrawGpuInfoWindow() std::vector zoneTrace; while( parent ) { - zoneTrace.emplace_back( parent ); - parent = GetZoneParentGPU( *parent ); + zoneTrace.emplace_back( parent.event ); + parent = GetZoneParentGPU( parent ); } int idx = 0; - DrawZoneTrace( &ev.event, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this] ( const ZoneEvent* v, int& fidx ) { + DrawZoneTrace( &ev.event, zoneTrace, m_worker, m_zoneinfoBuzzAnim, *this, m_showUnknownFrames, [&idx, this, ctx] ( const ZoneEvent* v, int& fidx ) { ImGui::TextDisabled( "%i.", fidx++ ); ImGui::SameLine(); const auto& srcloc = m_worker.GetSourceLocation( v->SrcLoc() ); @@ -1615,16 +1615,16 @@ void View::DrawGpuInfoWindow() } if( sel ) { - ShowZoneInfo( *v, m_gpuInfoWindowThread ); + ShowZoneInfo( { v, ctx }, m_gpuInfoWindowThread ); } if( hover ) { m_gpuHighlight = v; if( IsMouseClicked( 2 ) ) { - ZoomToZoneGPU( *v ); + ZoomToZoneGPU( { v, ctx } ); } - ZoneTooltipGPU( *v ); + ZoneTooltipGPU( { v, ctx } ); } } ); @@ -1638,11 +1638,11 @@ void View::DrawGpuInfoWindow() { if( children.is_magic() ) { - DrawGpuInfoChildren>( *(Vector*)( &children ), ztime ); + DrawGpuInfoChildren>( *(Vector*)( &children ), ztime, ctx ); } else { - DrawGpuInfoChildren>( children, ztime ); + DrawGpuInfoChildren>( children, ztime, ctx ); } ImGui::TreePop(); } @@ -1654,13 +1654,13 @@ void View::DrawGpuInfoWindow() if( !show ) { - m_gpuInfoWindow = nullptr; + m_gpuInfoWindow = { nullptr, nullptr }; m_gpuInfoStack.clear(); } } template -void View::DrawGpuInfoChildren( const V& children, int64_t ztime ) +void View::DrawGpuInfoChildren( const V& children, int64_t ztime, const GpuCtxData* ctx ) { Adapter a; const auto rztime = 1.0 / ztime; @@ -1729,16 +1729,16 @@ void View::DrawGpuInfoChildren( const V& children, int64_t ztime ) ImGui::TreeNodeEx( txt, ImGuiTreeNodeFlags_Leaf | ImGuiTreeNodeFlags_NoTreePushOnOpen ); if( ImGui::IsItemClicked() ) { - ShowZoneInfo( cev, m_gpuInfoWindowThread ); + ShowZoneInfo( { &cev, ctx }, m_gpuInfoWindowThread ); } if( ImGui::IsItemHovered() ) { m_gpuHighlight = &cev; if( IsMouseClicked( 2 ) ) { - ZoomToZoneGPU( cev ); + ZoomToZoneGPU( { &cev, ctx } ); } - ZoneTooltipGPU( cev ); + ZoneTooltipGPU( { &cev, ctx } ); } ImGui::PopID(); } @@ -1792,16 +1792,16 @@ void View::DrawGpuInfoChildren( const V& children, int64_t ztime ) ImGui::PushID( (int)cgr.v[cti[i]] ); if( ImGui::Selectable( txt, &b, ImGuiSelectableFlags_SpanAllColumns ) ) { - ShowZoneInfo( cev, m_gpuInfoWindowThread ); + ShowZoneInfo( { &cev, ctx }, m_gpuInfoWindowThread ); } if( ImGui::IsItemHovered() ) { m_gpuHighlight = &cev; if( IsMouseClicked( 2 ) ) { - ZoomToZoneGPU( cev ); + ZoomToZoneGPU( { &cev, ctx } ); } - ZoneTooltipGPU( cev ); + ZoneTooltipGPU( { &cev, ctx } ); } ImGui::PopID(); ImGui::Unindent(); @@ -1848,16 +1848,16 @@ void View::DrawGpuInfoChildren( const V& children, int64_t ztime ) ImGui::PushID( (int)i ); if( ImGui::Selectable( m_worker.GetZoneName( cev ), &b, ImGuiSelectableFlags_SpanAllColumns ) ) { - ShowZoneInfo( cev, m_gpuInfoWindowThread ); + ShowZoneInfo( { &cev, ctx }, m_gpuInfoWindowThread ); } if( ImGui::IsItemHovered() ) { m_gpuHighlight = &cev; if( IsMouseClicked( 2 ) ) { - ZoomToZoneGPU( cev ); + ZoomToZoneGPU( { &cev, ctx } ); } - ZoneTooltipGPU( cev ); + ZoneTooltipGPU( { &cev, ctx } ); } ImGui::PopID(); ImGui::NextColumn(); @@ -1881,18 +1881,18 @@ void View::ShowZoneInfo( const ZoneEvent& ev ) if( m_gpuInfoWindow ) { - m_gpuInfoWindow = nullptr; + m_gpuInfoWindow = { nullptr, nullptr }; m_gpuInfoStack.clear(); } } -void View::ShowZoneInfo( const ZoneEvent& ev, uint64_t thread ) +void View::ShowZoneInfo( const ZoneEventC ev, uint64_t thread ) { - if( m_gpuInfoWindow && m_gpuInfoWindow != &ev ) + if( m_gpuInfoWindow && m_gpuInfoWindow != ev ) { m_gpuInfoStack.push_back( m_gpuInfoWindow ); } - m_gpuInfoWindow = &ev; + m_gpuInfoWindow = ev; m_gpuInfoWindowThread = thread; if( m_zoneInfoWindow ) @@ -1981,9 +1981,10 @@ void View::ZoneTooltip( const ZoneEvent& ev ) ImGui::EndTooltip(); } -void View::ZoneTooltipGPU( const ZoneEvent& evt ) +void View::ZoneTooltipGPU( const ZoneEventC evtC ) { - const auto& ev = m_worker.GetGpuExtra(evt); + const auto& ev = m_worker.GetGpuExtra(*evtC.event); + auto ctx = evtC.ctx; const auto tid = GetZoneThreadGPU( ev ); const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto end = m_worker.GetZoneEndGPU( ev ); @@ -2018,7 +2019,6 @@ void View::ZoneTooltipGPU( const ZoneEvent& evt ) TextDisabledUnformatted( buf ); } TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) ); - auto ctx = GetZoneCtx( ev ); if( !ctx ) { TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) ); diff --git a/server/TracyEvent.hpp b/server/TracyEvent.hpp index 39417a48e8..1510003d07 100644 --- a/server/TracyEvent.hpp +++ b/server/TracyEvent.hpp @@ -797,6 +797,17 @@ struct GpuCtxData enum { GpuCtxDataSize = sizeof( GpuCtxData ) }; +struct ZoneEventC +{ + tracy_force_inline ZoneEventC(const ZoneEvent* event, const GpuCtxData* ctx): event(event), ctx(ctx) {} + + tracy_force_inline operator bool() const { return event != nullptr; } + tracy_force_inline bool operator==( const ZoneEventC& other ) const { return other.event == event; } + tracy_force_inline bool operator==( const ZoneEvent* other ) const { return other == event; } + + const ZoneEvent* event; + const GpuCtxData* ctx; +}; enum class PlotType : uint8_t { From ff4b18fee00793016a65be0e9407a48d142fb33d Mon Sep 17 00:00:00 2001 From: Eric Eaton Date: Tue, 7 Oct 2025 13:32:40 -0700 Subject: [PATCH 3/5] Merge the GetZoneColor functions This change has CPU and GPU use the same code to calculate zone colors. This changes the UI because the GPU zones will have a different color. The color of GPU zones will be set to match the CPU thread making the GPU calls. --- profiler/src/profiler/TracyView.hpp | 6 +-- .../src/profiler/TracyView_GpuTimeline.cpp | 4 +- profiler/src/profiler/TracyView_Utility.cpp | 44 +++---------------- .../src/profiler/TracyView_ZoneTimeline.cpp | 2 +- server/TracyEvent.hpp | 8 ++++ server/TracyWorker.cpp | 2 +- server/TracyWorker.hpp | 4 +- 7 files changed, 22 insertions(+), 48 deletions(-) diff --git a/profiler/src/profiler/TracyView.hpp b/profiler/src/profiler/TracyView.hpp index 0fa49a520a..12a70ebba1 100644 --- a/profiler/src/profiler/TracyView.hpp +++ b/profiler/src/profiler/TracyView.hpp @@ -332,10 +332,8 @@ class View uint32_t GetThreadColor( uint64_t thread, int depth ); uint32_t GetSrcLocColor( const SourceLocation& srcloc, int depth ); uint32_t GetRawSrcLocColor( const SourceLocation& srcloc, int depth ); - uint32_t GetZoneColor( const ZoneEvent& ev, uint64_t thread, int depth ); - uint32_t GetZoneColor( const ZoneEvent& ev ); - ZoneColorData GetZoneColorData( const ZoneEvent& ev, uint64_t thread, int depth, uint32_t inheritedColor ); - ZoneColorData GetZoneColorData( const ZoneEvent& ev ); + uint32_t GetZoneColor( const ZoneEventC ev, uint64_t thread, int depth ); + ZoneColorData GetZoneColorData( const ZoneEventC ev, uint64_t thread, int depth, uint32_t inheritedColor ); void ZoomToZone( const ZoneEvent& ev ); void ZoomToZoneGPU( const ZoneEventC ev ); diff --git a/profiler/src/profiler/TracyView_GpuTimeline.cpp b/profiler/src/profiler/TracyView_GpuTimeline.cpp index b7ea3303f4..9b25ca8212 100644 --- a/profiler/src/profiler/TracyView_GpuTimeline.cpp +++ b/profiler/src/profiler/TracyView_GpuTimeline.cpp @@ -162,7 +162,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const auto zoneThread = ctx->thread != 0 ? ctx->thread : m_worker.DecompressThread( ev.Thread() ); if( zsz < MinVisSize ) { - const auto color = GetZoneColor( ev ); + const auto color = GetZoneColor( { &ev.event, ctx }, zoneThread, depth ); const auto MinVisNs = MinVisSize * nspx; int num = 0; const auto px0 = ( start - m_vd.zvStart ) * pxns; @@ -242,7 +242,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, const auto pr1 = ( end - m_vd.zvStart ) * pxns; const auto px0 = std::max( pr0, -10.0 ); const auto px1 = std::max( { std::min( pr1, double( w + 10 ) ), px0 + pxns * 0.5, px0 + MinVisSize } ); - const auto zoneColor = GetZoneColorData( ev ); + const auto zoneColor = GetZoneColorData( { &ev.event, ctx }, zoneThread, depth, 0 ); draw->AddRectFilled( wpos + ImVec2( px0, offset ), wpos + ImVec2( px1, offset + tsz.y ), zoneColor.color ); if( zoneColor.highlight ) { diff --git a/profiler/src/profiler/TracyView_Utility.cpp b/profiler/src/profiler/TracyView_Utility.cpp index 03f1c6db90..bf5f962a09 100644 --- a/profiler/src/profiler/TracyView_Utility.cpp +++ b/profiler/src/profiler/TracyView_Utility.cpp @@ -49,13 +49,13 @@ uint32_t View::GetSrcLocColor( const SourceLocation& srcloc, int depth ) return GetRawSrcLocColor( srcloc, depth ); } -uint32_t View::GetZoneColor( const ZoneEvent& ev, uint64_t thread, int depth ) +uint32_t View::GetZoneColor( const ZoneEventC ev, uint64_t thread, int depth ) { const auto sl = ev.SrcLoc(); const auto& srcloc = m_worker.GetSourceLocation( sl ); if( !m_vd.forceColors ) { - if( m_worker.HasZoneExtra( ev ) ) + if( m_worker.HasZoneExtra( *ev.event ) ) { const auto custom_color = m_worker.GetZoneExtra( ev ).color.Val(); if( custom_color != 0 ) return custom_color | 0xFF000000; @@ -77,25 +77,18 @@ uint32_t View::GetZoneColor( const ZoneEvent& ev, uint64_t thread, int depth ) } } -uint32_t View::GetZoneColor( const ZoneEvent& ev ) -{ - const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); - const auto color = srcloc.color; - return color != 0 ? ( color | 0xFF000000 ) : 0xFF222288; -} - -View::ZoneColorData View::GetZoneColorData( const ZoneEvent& ev, uint64_t thread, int depth, uint32_t inheritedColor ) +View::ZoneColorData View::GetZoneColorData( const ZoneEventC ev, uint64_t thread, int depth, uint32_t inheritedColor ) { ZoneColorData ret; const auto& srcloc = ev.SrcLoc(); - if( m_zoneInfoWindow == &ev ) + if( m_zoneInfoWindow == ev.event ) { ret.color = inheritedColor ? inheritedColor : GetZoneColor( ev, thread, depth ); ret.accentColor = 0xFF44DD44; ret.thickness = 3.f; ret.highlight = true; } - else if( m_zoneHighlight == &ev ) + else if( m_zoneHighlight == ev.event ) { ret.color = inheritedColor ? inheritedColor : GetZoneColor( ev, thread, depth ); ret.accentColor = 0xFF4444FF; @@ -136,33 +129,6 @@ View::ZoneColorData View::GetZoneColorData( const ZoneEvent& ev, uint64_t thread return ret; } -View::ZoneColorData View::GetZoneColorData( const ZoneEvent& ev ) -{ - ZoneColorData ret; - const auto color = GetZoneColor( ev ); - ret.color = color; - if( m_gpuInfoWindow == &ev ) - { - ret.accentColor = 0xFF44DD44; - ret.thickness = 3.f; - ret.highlight = true; - } - else if( m_gpuHighlight == &ev ) - { - ret.accentColor = 0xFF4444FF; - ret.thickness = 3.f; - ret.highlight = true; - } - else - { - ret.accentColor = HighlightColor( color ); - ret.thickness = 1.f; - ret.highlight = false; - } - return ret; -} - - const ZoneEvent* View::FindZoneAtTime( uint64_t thread, int64_t time ) const { // TODO add thread rev-map diff --git a/profiler/src/profiler/TracyView_ZoneTimeline.cpp b/profiler/src/profiler/TracyView_ZoneTimeline.cpp index 7cdac67440..85015e2efe 100644 --- a/profiler/src/profiler/TracyView_ZoneTimeline.cpp +++ b/profiler/src/profiler/TracyView_ZoneTimeline.cpp @@ -351,7 +351,7 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vectorStart(); } + tracy_force_inline int64_t End() const { return event->End(); } + tracy_force_inline bool IsEndValid() const { return event->IsEndValid(); } + tracy_force_inline int16_t SrcLoc() const { return event->SrcLoc(); } + tracy_force_inline int32_t Child() const { return event->Child(); } + tracy_force_inline bool HasChildren() const { return event->HasChildren(); } + tracy_force_inline bool IsGpu() const { return ctx != nullptr; } + tracy_force_inline operator bool() const { return event != nullptr; } tracy_force_inline bool operator==( const ZoneEventC& other ) const { return other.event == event; } tracy_force_inline bool operator==( const ZoneEvent* other ) const { return other == event; } diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index 0269b19b39..e1e406e989 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -2359,7 +2359,7 @@ const uint64_t* Worker::GetInlineSymbolList( uint64_t sym, uint32_t len ) return it; } -int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev, const Vector>>& childArray ) +int64_t Worker::GetZoneEndImpl( const ZoneEvent& ev, const Vector>>& childArray ) const { assert( !ev.IsEndValid() ); auto ptr = &ev; diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 8686a1dacf..8173c6836b 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -579,6 +579,7 @@ class Worker // before its children have ended). // GetZoneEndDirect() will only return zone's direct timing data, without looking at children. tracy_force_inline int64_t GetZoneEnd( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev, m_data.zoneChildren ); } + tracy_force_inline int64_t GetZoneEnd( const ZoneEventC ev ) const { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( *ev.event, ev.IsGpu() ? m_data.gpuChildren : m_data.zoneChildren ); } tracy_force_inline int64_t GetZoneEndGPU( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : GetZoneEndImpl( ev, m_data.gpuChildren ); } static tracy_force_inline int64_t GetZoneEndDirect( const ZoneEvent& ev ) { return ev.IsEndValid() ? ev.End() : ev.Start(); } @@ -606,6 +607,7 @@ class Worker tracy_force_inline const bool HasZoneExtra( const ZoneEvent& ev ) const { return ev.extra != 0; } tracy_force_inline const ZoneExtra& GetZoneExtra( const ZoneEvent& ev ) const { return m_data.zoneExtra[ev.extra]; } tracy_force_inline const EventAdapter GetGpuExtra( const ZoneEvent& ev ) const { return { ev, m_data.gpuExtra[ev.extra] }; } + tracy_force_inline const ZoneExtra& GetZoneExtra( const ZoneEventC ev ) const { return ev.IsGpu() ? m_data.gpuExtra[ev.event->extra] : m_data.zoneExtra[ev.event->extra]; } std::vector GetMatchingSourceLocation( const char* query, bool ignoreCase ) const; @@ -959,7 +961,7 @@ class Worker tracy_force_inline ZoneExtra& AllocZoneExtra( ZoneEvent& ev ); tracy_force_inline ZoneExtra& RequestZoneExtra( ZoneEvent& ev ); - int64_t GetZoneEndImpl( const ZoneEvent& ev, const Vector>>& childArray ); + int64_t GetZoneEndImpl( const ZoneEvent& ev, const Vector>>& childArray ) const; void UpdateMbps( int64_t td ); From 8bfc6ad5b63b593f445e4ee838c5a84957262561 Mon Sep 17 00:00:00 2001 From: Eric Eaton Date: Thu, 9 Oct 2025 13:10:59 -0700 Subject: [PATCH 4/5] Merge ZoneTooltip functions --- profiler/src/profiler/TracyView.hpp | 4 +- profiler/src/profiler/TracyView_FindZone.cpp | 2 +- .../src/profiler/TracyView_GpuTimeline.cpp | 4 +- profiler/src/profiler/TracyView_Memory.cpp | 8 +- profiler/src/profiler/TracyView_Utility.cpp | 5 + profiler/src/profiler/TracyView_ZoneInfo.cpp | 178 ++++++++---------- .../src/profiler/TracyView_ZoneTimeline.cpp | 4 +- server/TracyWorker.cpp | 8 + server/TracyWorker.hpp | 1 + 9 files changed, 104 insertions(+), 110 deletions(-) diff --git a/profiler/src/profiler/TracyView.hpp b/profiler/src/profiler/TracyView.hpp index 12a70ebba1..579f828b6b 100644 --- a/profiler/src/profiler/TracyView.hpp +++ b/profiler/src/profiler/TracyView.hpp @@ -344,8 +344,7 @@ class View void ShowZoneInfo( const ZoneEvent& ev ); void ShowZoneInfo( const ZoneEventC ev, uint64_t thread ); - void ZoneTooltip( const ZoneEvent& ev ); - void ZoneTooltipGPU( const ZoneEventC ev ); + void ZoneTooltip( const ZoneEventC ev ); void CallstackTooltip( uint32_t idx ); void CallstackTooltipContents( uint32_t idx ); void CrashTooltip(); @@ -359,6 +358,7 @@ class View const ThreadData* GetZoneThreadData( const ZoneEvent& zone ) const; uint64_t GetZoneThread( const ZoneEvent& zone ) const; uint64_t GetZoneThreadGPU( const EventAdapter& zone ) const; + uint64_t GetZoneThread( const ZoneEventC evC ) const; bool FindMatchingZone( int prev0, int prev1, int flags ); const ZoneEvent* FindZoneAtTime( uint64_t thread, int64_t time ) const; uint64_t GetFrameNumber( const FrameData& fd, int i ) const; diff --git a/profiler/src/profiler/TracyView_FindZone.cpp b/profiler/src/profiler/TracyView_FindZone.cpp index e12b4230b7..ce21fc6cfe 100644 --- a/profiler/src/profiler/TracyView_FindZone.cpp +++ b/profiler/src/profiler/TracyView_FindZone.cpp @@ -230,7 +230,7 @@ void View::DrawZoneList( int id, const Vector>& zones ) { ZoomToZone( *ev ); } - ZoneTooltip( *ev ); + ZoneTooltip( { ev, nullptr } ); m_zoneHover2 = ev; } diff --git a/profiler/src/profiler/TracyView_GpuTimeline.cpp b/profiler/src/profiler/TracyView_GpuTimeline.cpp index 9b25ca8212..2c50f032f1 100644 --- a/profiler/src/profiler/TracyView_GpuTimeline.cpp +++ b/profiler/src/profiler/TracyView_GpuTimeline.cpp @@ -203,7 +203,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, } else { - ZoneTooltipGPU( { &ev.event, ctx } ); + ZoneTooltip( { &ev.event, ctx } ); if( IsMouseClicked( 2 ) && rend - start > 0 ) { @@ -288,7 +288,7 @@ int View::DrawGpuZoneLevel( const V& vec, bool hover, double pxns, int64_t nspx, if( hover && ImGui::IsMouseHoveringRect( wpos + ImVec2( px0, offset ), wpos + ImVec2( px1, offset + tsz.y + 1 ) ) ) { - ZoneTooltipGPU( { &ev.event, ctx } ); + ZoneTooltip( { &ev.event, ctx } ); if( !m_zoomAnim.active && IsMouseClicked( 2 ) ) { diff --git a/profiler/src/profiler/TracyView_Memory.cpp b/profiler/src/profiler/TracyView_Memory.cpp index 7689683e56..3f4401560f 100644 --- a/profiler/src/profiler/TracyView_Memory.cpp +++ b/profiler/src/profiler/TracyView_Memory.cpp @@ -618,7 +618,7 @@ void View::DrawMemoryAllocWindow() { ZoomToZone( *zoneAlloc ); } - ZoneTooltip( *zoneAlloc ); + ZoneTooltip( { zoneAlloc, nullptr } ); } } @@ -643,7 +643,7 @@ void View::DrawMemoryAllocWindow() { ZoomToZone( *zoneFree ); } - ZoneTooltip( *zoneFree ); + ZoneTooltip( { zoneFree, nullptr } ); } if( zoneAlloc == zoneFree ) { @@ -833,7 +833,7 @@ void View::ListMemData( std::vector& vec, const std::function& vec, const std::function& zone ) const } } +uint64_t View::GetZoneThread( const ZoneEventC evC ) const +{ + return evC.IsGpu() ? GetZoneThreadGPU( m_worker.GetGpuExtra( *evC.event ) ) : GetZoneThread( *evC.event ); +} + int64_t View::GetZoneChildTime( const ZoneEvent& zone, bool gpu ) { int64_t time = 0; diff --git a/profiler/src/profiler/TracyView_ZoneInfo.cpp b/profiler/src/profiler/TracyView_ZoneInfo.cpp index 345c6ea29e..763d1bcff2 100644 --- a/profiler/src/profiler/TracyView_ZoneInfo.cpp +++ b/profiler/src/profiler/TracyView_ZoneInfo.cpp @@ -1049,7 +1049,7 @@ void View::DrawZoneInfoWindow() { ZoomToZone( *v ); } - ZoneTooltip( *v ); + ZoneTooltip( { v, nullptr } ); } } ); @@ -1281,7 +1281,7 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) { ZoomToZone( cev ); } - ZoneTooltip( cev ); + ZoneTooltip( { &cev, nullptr } ); } ImGui::PopID(); } @@ -1350,7 +1350,7 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) { ZoomToZone( cev ); } - ZoneTooltip( cev ); + ZoneTooltip( { &cev, nullptr } ); } ImGui::PopID(); ImGui::Unindent(); @@ -1416,7 +1416,7 @@ void View::DrawZoneInfoChildren( const V& children, int64_t ztime ) { ZoomToZone( cev ); } - ZoneTooltip( cev ); + ZoneTooltip( { &cev, nullptr } ); } ImGui::PopID(); ImGui::NextColumn(); @@ -1624,7 +1624,7 @@ void View::DrawGpuInfoWindow() { ZoomToZoneGPU( { v, ctx } ); } - ZoneTooltipGPU( { v, ctx } ); + ZoneTooltip( { v, ctx } ); } } ); @@ -1738,7 +1738,7 @@ void View::DrawGpuInfoChildren( const V& children, int64_t ztime, const GpuCtxDa { ZoomToZoneGPU( { &cev, ctx } ); } - ZoneTooltipGPU( { &cev, ctx } ); + ZoneTooltip( { &cev, ctx } ); } ImGui::PopID(); } @@ -1801,7 +1801,7 @@ void View::DrawGpuInfoChildren( const V& children, int64_t ztime, const GpuCtxDa { ZoomToZoneGPU( { &cev, ctx } ); } - ZoneTooltipGPU( { &cev, ctx } ); + ZoneTooltip( { &cev, ctx } ); } ImGui::PopID(); ImGui::Unindent(); @@ -1857,7 +1857,7 @@ void View::DrawGpuInfoChildren( const V& children, int64_t ztime, const GpuCtxDa { ZoomToZoneGPU( { &cev, ctx } ); } - ZoneTooltipGPU( { &cev, ctx } ); + ZoneTooltip( { &cev, ctx } ); } ImGui::PopID(); ImGui::NextColumn(); @@ -1902,16 +1902,16 @@ void View::ShowZoneInfo( const ZoneEventC ev, uint64_t thread ) } } -void View::ZoneTooltip( const ZoneEvent& ev ) +void View::ZoneTooltip( const ZoneEventC ev ) { const auto tid = GetZoneThread( ev ); auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); const auto end = m_worker.GetZoneEnd( ev ); const auto ztime = end - ev.Start(); - const auto selftime = GetZoneSelfTime( ev ); + const auto selftime = GetZoneSelfTime( *ev.event, ev.IsGpu() ); ImGui::BeginTooltip(); - if( m_worker.HasZoneExtra( ev ) && m_worker.GetZoneExtra( ev ).name.Active() ) + if( m_worker.HasZoneExtra( *ev.event ) && m_worker.GetZoneExtra( ev ).name.Active() ) { ImGui::TextUnformatted( m_worker.GetString( m_worker.GetZoneExtra( ev ).name ) ); } @@ -1937,80 +1937,29 @@ void View::ZoneTooltip( const ZoneEvent& ev ) ImGui::Separator(); TextFocused( "Execution time:", TimeToString( ztime ) ); #ifndef TRACY_NO_STATISTICS - if( m_worker.AreSourceLocationZonesReady() ) - { - auto& zoneData = m_worker.GetZonesForSourceLocation( ev.SrcLoc() ); - if( zoneData.total > 0 ) + if( ev.IsGpu() ) { + if( m_worker.AreGpuSourceLocationZonesReady() ) { - ImGui::SameLine(); - ImGui::TextDisabled( "(%.2f%% of mean time)", float( ztime ) / zoneData.total * zoneData.zones.size() * 100 ); + auto& zoneData = m_worker.GetGpuZonesForSourceLocation( ev.SrcLoc() ); + if( zoneData.total > 0 ) + { + ImGui::SameLine(); + ImGui::TextDisabled( "(%.2f%% of mean time)", float( ztime ) / zoneData.total * zoneData.zones.size() * 100 ); + } } - } -#endif - TextFocused( "Self time:", TimeToString( selftime ) ); - if( ztime != 0 ) - { - char buf[64]; - PrintStringPercent( buf, 100.f * selftime / ztime ); - ImGui::SameLine(); - TextDisabledUnformatted( buf ); - } - const auto ctx = m_worker.GetContextSwitchData( tid ); - if( ctx ) - { - int64_t time; - uint64_t cnt; - if( GetZoneRunningTime( ctx, ev, time, cnt ) ) + } else { + if( m_worker.AreSourceLocationZonesReady() ) { - TextFocused( "Running state time:", TimeToString( time ) ); - if( ztime != 0 ) + auto& zoneData = m_worker.GetZonesForSourceLocation( ev.SrcLoc() ); + if( zoneData.total > 0 ) { - char buf[64]; - PrintStringPercent( buf, 100.f * time / ztime ); ImGui::SameLine(); - TextDisabledUnformatted( buf ); + ImGui::TextDisabled( "(%.2f%% of mean time)", float( ztime ) / zoneData.total * zoneData.zones.size() * 100 ); } - TextFocused( "Running state regions:", RealToString( cnt ) ); } } - if( m_worker.HasZoneExtra( ev ) && m_worker.GetZoneExtra( ev ).text.Active() ) - { - ImGui::NewLine(); - TextColoredUnformatted( ImVec4( 0xCC / 255.f, 0xCC / 255.f, 0x22 / 255.f, 1.f ), m_worker.GetString( m_worker.GetZoneExtra( ev ).text ) ); - } - ImGui::EndTooltip(); -} - -void View::ZoneTooltipGPU( const ZoneEventC evtC ) -{ - const auto& ev = m_worker.GetGpuExtra(*evtC.event); - auto ctx = evtC.ctx; - const auto tid = GetZoneThreadGPU( ev ); - const auto& srcloc = m_worker.GetSourceLocation( ev.SrcLoc() ); - const auto end = m_worker.GetZoneEndGPU( ev ); - const auto ztime = end - ev.GpuStart(); - const auto selftime = GetZoneSelfTime( ev, true ); - - ImGui::BeginTooltip(); - ImGui::TextUnformatted( m_worker.GetString( srcloc.name ) ); - ImGui::TextUnformatted( m_worker.GetString( srcloc.function ) ); - ImGui::Separator(); - SmallColorBox( GetSrcLocColor( srcloc, 0 ) ); - ImGui::SameLine(); - ImGui::TextUnformatted( LocationToString( m_worker.GetString( srcloc.file ), srcloc.line ) ); - SmallColorBox( GetThreadColor( tid, 0 ) ); - ImGui::SameLine(); - TextFocused( "Thread:", m_worker.GetThreadName( tid ) ); - ImGui::SameLine(); - ImGui::TextDisabled( "(%s)", RealToString( tid ) ); - if( m_worker.IsThreadFiber( tid ) ) - { - ImGui::SameLine(); - TextColoredUnformatted( ImVec4( 0.2f, 0.6f, 0.2f, 1.f ), "Fiber" ); - } - ImGui::Separator(); - TextFocused( "GPU execution time:", TimeToString( ztime ) ); - TextFocused( "GPU self time:", TimeToString( selftime ) ); +#endif + TextFocused( "Self time:", TimeToString( selftime ) ); if( ztime != 0 ) { char buf[64]; @@ -2018,43 +1967,74 @@ void View::ZoneTooltipGPU( const ZoneEventC evtC ) ImGui::SameLine(); TextDisabledUnformatted( buf ); } - TextFocused( "CPU command setup time:", TimeToString( ev.CpuEnd() - ev.CpuStart() ) ); - if( !ctx ) + if( ev.IsGpu() ) { - TextFocused( "Delay to execution:", TimeToString( ev.GpuStart() - ev.CpuStart() ) ); - } - else - { - const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ev.Thread() ) ); - assert( td != ctx->threadData.end() ); - int64_t begin; - if( td->second.timeline.is_magic() ) + auto ctx = ev.ctx; + const auto& ex = m_worker.GetGpuExtra(*ev.event); + TextFocused( "CPU command setup time:", TimeToString( ex.CpuEnd() - ex.CpuStart() ) ); + if( !ctx ) { - begin = ((Vector*)&td->second.timeline)->front().Start(); + TextFocused( "Delay to execution:", TimeToString( ex.GpuStart() - ex.CpuStart() ) ); } else { - begin = td->second.timeline.front()->Start(); + const auto td = ctx->threadData.size() == 1 ? ctx->threadData.begin() : ctx->threadData.find( m_worker.DecompressThread( ex.Thread() ) ); + assert( td != ctx->threadData.end() ); + int64_t begin; + if( td->second.timeline.is_magic() ) + { + begin = ( (Vector*)&td->second.timeline )->front().Start(); + } + else + { + begin = td->second.timeline.front()->Start(); + } + const auto drift = GpuDrift( ctx ); + TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ex.GpuStart(), begin, drift ) - ex.CpuStart() ) ); } - const auto drift = GpuDrift( ctx ); - TextFocused( "Delay to execution:", TimeToString( AdjustGpuTime( ev.GpuStart(), begin, drift ) - ev.CpuStart() ) ); - } - if( ctx->notes.contains( ev.query_id ) ) - { - for( auto& p : ctx->notes.at( ev.query_id ) ) + if( ctx->notes.contains( ex.query_id ) ) { - if( ctx->noteNames.count( p.first ) ) + for( auto& p : ctx->notes.at( ex.query_id ) ) { - TextFocused( m_worker.GetString( ctx->noteNames.at( p.first ) ), RealToString( p.second ) ); + if( ctx->noteNames.count( p.first ) ) + { + TextFocused( m_worker.GetString( ctx->noteNames.at( p.first ) ), RealToString( p.second ) ); + } + else + { + TextFocused( RealToString( p.first ), RealToString( p.second ) ); + } } - else + } + } + else + { + const auto ctx = m_worker.GetContextSwitchData( tid ); + if( ctx ) + { + int64_t time; + uint64_t cnt; + if( GetZoneRunningTime( ctx, *ev.event, time, cnt ) ) { - TextFocused( RealToString( p.first ), RealToString( p.second ) ); + TextFocused( "Running state time:", TimeToString( time ) ); + if( ztime != 0 ) + { + char buf[64]; + PrintStringPercent( buf, 100.f * time / ztime ); + ImGui::SameLine(); + TextDisabledUnformatted( buf ); + } + TextFocused( "Running state regions:", RealToString( cnt ) ); } } } + if( m_worker.HasZoneExtra( *ev.event ) && m_worker.GetZoneExtra( ev ).text.Active() ) + { + ImGui::NewLine(); + TextColoredUnformatted( ImVec4( 0xCC / 255.f, 0xCC / 255.f, 0x22 / 255.f, 1.f ), m_worker.GetString( m_worker.GetZoneExtra( ev ).text ) ); + } ImGui::EndTooltip(); } diff --git a/profiler/src/profiler/TracyView_ZoneTimeline.cpp b/profiler/src/profiler/TracyView_ZoneTimeline.cpp index 85015e2efe..4dca55ddca 100644 --- a/profiler/src/profiler/TracyView_ZoneTimeline.cpp +++ b/profiler/src/profiler/TracyView_ZoneTimeline.cpp @@ -310,7 +310,7 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vector 0 ) { @@ -384,7 +384,7 @@ void View::DrawZoneList( const TimelineContext& ctx, const std::vectorsecond : empty; } +const Worker::SourceLocationZones& Worker::GetGpuZonesForSourceLocation( int16_t srcloc ) const +{ + assert( AreGpuSourceLocationZonesReady() ); + static const SourceLocationZones empty; + auto it = m_data.gpuSourceLocationZones.find( srcloc ); + return it != m_data.gpuSourceLocationZones.end() ? it->second : empty; +} + const SymbolStats* Worker::GetSymbolStats( uint64_t symAddr ) const { assert( AreCallstackSamplesReady() ); diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 8173c6836b..4b42e5a78c 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -616,6 +616,7 @@ class Worker #ifndef TRACY_NO_STATISTICS SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ); const SourceLocationZones& GetZonesForSourceLocation( int16_t srcloc ) const; + const SourceLocationZones& GetGpuZonesForSourceLocation( int16_t srcloc ) const; const unordered_flat_map& GetSourceLocationZones() const { return m_data.sourceLocationZones; } const unordered_flat_map& GetGpuSourceLocationZones() const { return m_data.gpuSourceLocationZones; } bool AreSourceLocationZonesReady() const { return m_data.sourceLocationZonesReady; } From f3de403107d365f056e6587449c539f851d50e02 Mon Sep 17 00:00:00 2001 From: Eric Eaton Date: Thu, 9 Oct 2025 18:26:18 -0700 Subject: [PATCH 5/5] Merge ReconstructZoneStatistics functions --- server/TracyWorker.cpp | 46 +++++++++++++----------------------------- server/TracyWorker.hpp | 3 +-- 2 files changed, 15 insertions(+), 34 deletions(-) diff --git a/server/TracyWorker.cpp b/server/TracyWorker.cpp index b93350b8ba..58bd827475 100644 --- a/server/TracyWorker.cpp +++ b/server/TracyWorker.cpp @@ -1727,7 +1727,7 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow auto& vec = *(Vector*)( &_vec ); for( auto& zone : vec ) { - if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, thread ); + if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, thread, false ); if( zone.HasChildren() ) { countMap[uint16_t(zone.SrcLoc())]++; @@ -1752,18 +1752,20 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow m_data.sourceLocationZonesReady = true; } ) ); - std::function>&, uint16_t)> ProcessTimelineGpu; - ProcessTimelineGpu = [this, &ProcessTimelineGpu] ( Vector>& _vec, uint16_t thread ) + std::function>&, uint16_t)> ProcessTimelineGpu; + ProcessTimelineGpu = [this, &ProcessTimelineGpu] ( uint8_t* countMap, Vector>& _vec, uint16_t thread ) { if( m_shutdown.load( std::memory_order_relaxed ) ) return; assert( _vec.is_magic() ); auto& vec = *(Vector*)( &_vec ); for( auto& zone : vec ) { - if( zone.End() >= 0 ) ReconstructZoneStatistics( zone, thread ); + if( zone.IsEndValid() ) ReconstructZoneStatistics( countMap, zone, thread, true ); if( zone.Child() >= 0 ) { - ProcessTimelineGpu( GetGpuChildrenMutable( zone.Child() ), thread ); + countMap[uint16_t(zone.SrcLoc())]++; + ProcessTimelineGpu( countMap, GetGpuChildrenMutable( zone.Child() ), thread ); + countMap[uint16_t(zone.SrcLoc())]--; } } }; @@ -1776,7 +1778,8 @@ Worker::Worker( FileRead& f, EventType::Type eventMask, bool bgTasks, bool allow if( m_shutdown.load( std::memory_order_relaxed ) ) return; if( !td.second.timeline.empty() ) { - ProcessTimelineGpu( td.second.timeline, td.first ); + uint8_t countMap[64*1024]; + ProcessTimelineGpu( countMap, td.second.timeline, td.first ); } } } @@ -7686,14 +7689,15 @@ void Worker::ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t& refTim } #ifndef TRACY_NO_STATISTICS -void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ) +void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread, bool is_gpu ) { assert( zone.IsEndValid() ); auto timeSpan = zone.End() - zone.Start(); if( timeSpan > 0 ) { - auto it = m_data.sourceLocationZones.find( zone.SrcLoc() ); - assert( it != m_data.sourceLocationZones.end() ); + auto& slzMap = is_gpu ? m_data.gpuSourceLocationZones : m_data.sourceLocationZones; + auto it = slzMap.find( zone.SrcLoc() ); + assert( it != slzMap.end() ); ZoneThreadData ztd; ztd.SetZone( &zone ); @@ -7716,7 +7720,7 @@ void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint if( zone.HasChildren() ) { - auto& children = GetZoneChildren( zone.Child() ); + auto& children = is_gpu ? GetGpuChildren( zone.Child() ) : GetZoneChildren( zone.Child() ); assert( children.is_magic() ); auto& c = *(Vector*)( &children ); for( auto& v : c ) @@ -7742,28 +7746,6 @@ void Worker::ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint } } -void Worker::ReconstructZoneStatistics( ZoneEvent& zone, uint16_t thread ) -{ - assert( zone.End() >= 0 ); - auto timeSpan = zone.End() - zone.Start(); - if( timeSpan > 0 ) - { - auto it = m_data.gpuSourceLocationZones.find( zone.SrcLoc() ); - if( it == m_data.gpuSourceLocationZones.end() ) - { - it = m_data.gpuSourceLocationZones.emplace( zone.SrcLoc(), SourceLocationZones {} ).first; - } - ZoneThreadData ztd; - ztd.SetZone( &zone ); - ztd.SetThread( thread ); - auto& slz = it->second; - slz.zones.push_back( ztd ); - if( slz.min > timeSpan ) slz.min = timeSpan; - if( slz.max < timeSpan ) slz.max = timeSpan; - slz.total += timeSpan; - slz.sumSq += double( timeSpan ) * timeSpan; - } -} #else void Worker::CountZoneStatistics( ZoneEvent* zone ) { diff --git a/server/TracyWorker.hpp b/server/TracyWorker.hpp index 4b42e5a78c..dbb4ce28e0 100644 --- a/server/TracyWorker.hpp +++ b/server/TracyWorker.hpp @@ -951,8 +951,7 @@ class Worker tracy_force_inline void ReadTimelineHaveSize( FileRead& f, ZoneEvent* zone, int64_t& refTime, int64_t& refGpuTime, int32_t& childIdx, uint64_t sz, bool hasQueryId ); #ifndef TRACY_NO_STATISTICS - tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread ); - tracy_force_inline void ReconstructZoneStatistics( ZoneEvent& zone, uint16_t thread ); + tracy_force_inline void ReconstructZoneStatistics( uint8_t* countMap, ZoneEvent& zone, uint16_t thread, bool is_gpu ); #else tracy_force_inline void CountZoneStatistics( ZoneEvent* zone ); #endif