From d7012d1554efd61e163c4909be5c7f2b99835a32 Mon Sep 17 00:00:00 2001 From: Shu Date: Sat, 7 Nov 2020 22:31:17 +0100 Subject: [PATCH] Update tracy to 0.7.3 --- deps/tracy/AUTHORS | 3 + deps/tracy/Tracy.hpp | 110 ++++-- deps/tracy/TracyC.h | 42 +- deps/tracy/TracyClient.cpp | 5 + deps/tracy/TracyOpenGL.hpp | 18 +- deps/tracy/TracyVulkan.hpp | 162 ++++++-- deps/tracy/client/TracyArmCpuTable.hpp | 29 +- deps/tracy/client/TracyCallstack.cpp | 53 +-- deps/tracy/client/TracyCallstack.hpp | 6 +- deps/tracy/client/TracyDxt1.cpp | 12 + deps/tracy/client/TracyLock.hpp | 57 ++- deps/tracy/client/TracyProfiler.cpp | 379 +++++++++++------- deps/tracy/client/TracyProfiler.hpp | 277 ++++++++++---- deps/tracy/client/TracyRingBuffer.hpp | 116 ++++++ deps/tracy/client/TracyScoped.hpp | 58 ++- deps/tracy/client/TracySysTrace.cpp | 445 +++++++++++++++++++--- deps/tracy/client/TracyThread.hpp | 15 + deps/tracy/client/tracy_concurrentqueue.h | 18 - deps/tracy/client/tracy_rpmalloc.cpp | 2 +- deps/tracy/common/TracyProtocol.hpp | 11 +- deps/tracy/common/TracyQueue.hpp | 155 +++++--- deps/tracy/common/TracySocket.cpp | 186 ++++++--- deps/tracy/common/TracySocket.hpp | 14 +- deps/tracy/common/TracySystem.cpp | 10 + src/client/contouring/FlatDualMC.cpp | 7 +- src/client/render/vk/CommandCenter.hpp | 4 +- src/client/world/Chunk.hpp | 2 +- src/client/world/DistantUniverse.cpp | 51 +-- src/core/net/io.hpp | 4 +- src/core/world/Chunk.hpp | 6 +- src/core/world/EdittableChunk.cpp | 9 +- src/core/world/EdittableChunk.hpp | 6 +- src/core/world/iterators.hpp | 28 ++ src/server/net/Server.hpp | 8 + src/server/world/SharedParts.hpp | 3 +- src/server/world/Universe.cpp | 85 ++--- 36 files changed, 1728 insertions(+), 668 deletions(-) create mode 100644 deps/tracy/client/TracyRingBuffer.hpp diff --git a/deps/tracy/AUTHORS b/deps/tracy/AUTHORS index 826e3e7..52c7654 100644 --- a/deps/tracy/AUTHORS +++ b/deps/tracy/AUTHORS @@ -9,3 +9,6 @@ Dedmen Miller (find zone bug fixes, improv Michał Cichoń (OSX call stack decoding backport) Thales Sabino (OpenCL support) Andrew Depke (Direct3D 12 support) +Simonas Kazlauskas (OSX CI, external bindings) +Jakub Žádník (csvexport utility) +Andrey Voroshilov (multi-DLL fixes) diff --git a/deps/tracy/Tracy.hpp b/deps/tracy/Tracy.hpp index 6425356..34e8736 100644 --- a/deps/tracy/Tracy.hpp +++ b/deps/tracy/Tracy.hpp @@ -11,6 +11,9 @@ #define ZoneNamedC(x,y,z) #define ZoneNamedNC(x,y,z,w) +#define ZoneTransient(x,y) +#define ZoneTransientN(x,y,z) + #define ZoneScoped #define ZoneScopedN(x) #define ZoneScopedC(x) @@ -50,12 +53,22 @@ #define TracyAlloc(x,y) #define TracyFree(x) +#define TracySecureAlloc(x,y) +#define TracySecureFree(x) + +#define TracyAllocN(x,y,z) +#define TracyFreeN(x,y) +#define TracySecureAllocN(x,y,z) +#define TracySecureFreeN(x,y) #define ZoneNamedS(x,y,z) #define ZoneNamedNS(x,y,z,w) #define ZoneNamedCS(x,y,z,w) #define ZoneNamedNCS(x,y,z,w,a) +#define ZoneTransientS(x,y,z) +#define ZoneTransientNS(x,y,z,w) + #define ZoneScopedS(x) #define ZoneScopedNS(x,y) #define ZoneScopedCS(x,y) @@ -63,6 +76,13 @@ #define TracyAllocS(x,y,z) #define TracyFreeS(x,y) +#define TracySecureAllocS(x,y,z) +#define TracySecureFreeS(x,y) + +#define TracyAllocNS(x,y,z,w) +#define TracyFreeNS(x,y,z) +#define TracySecureAllocNS(x,y,z,w) +#define TracySecureFreeNS(x,y,z) #define TracyMessageS(x,y,z) #define TracyMessageLS(x,y) @@ -71,23 +91,32 @@ #define TracyParameterRegister(x) #define TracyParameterSetup(x,y,z,w) +#define TracyIsConnected false #else +#include + #include "client/TracyLock.hpp" #include "client/TracyProfiler.hpp" #include "client/TracyScoped.hpp" #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); -# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); -# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); -# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); +# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); +# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); +# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); +# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); + +# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active ); +# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active ); #else -# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); -# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); -# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); -# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); +# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); +# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); +# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); +# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); + +# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active ); +# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active ); #endif #define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) @@ -109,13 +138,13 @@ #define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip ); -#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() }; -#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() }; -#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() }; -#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() }; +#define TracyLockable( type, varname ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() }; +#define TracyLockableN( type, varname, desc ) tracy::Lockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() }; +#define TracySharedLockable( type, varname ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() }; +#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() }; #define LockableBase( type ) tracy::Lockable #define SharedLockableBase( type ) tracy::SharedLockable -#define LockMark( varname ) static const tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname ); +#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname ); #define LockableName( varname, txt, size ) varname.CustomName( txt, size ); #define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ); @@ -129,31 +158,55 @@ # define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ); # define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ); -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK ); -# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK ); +# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false ); +# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false ); +# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true ); +# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true ); + +# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name ); +# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name ); +# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name ); +# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name ); #else # define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 ); # define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 ); # define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 ); # define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 ); -# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size ); -# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr ); +# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false ); +# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false ); +# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true ); +# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true ); + +# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name ); +# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name ); +# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name ); +# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name ); #endif #ifdef TRACY_HAS_CALLSTACK -# define ZoneNamedS( varname, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); -# define ZoneNamedNS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); -# define ZoneNamedCS( varname, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); -# define ZoneNamedNCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); +# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); +# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); +# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); +# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); + +# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active ); +# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active ); # define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) # define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) # define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) # define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) -# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth ); -# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth ); +# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false ); +# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false ); +# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true ); +# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true ); + +# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name ); +# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name ); +# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name ); +# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name ); # define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ); # define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ); @@ -165,6 +218,9 @@ # define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active ) # define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active ) +# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active ) +# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active ) + # define ZoneScopedS( depth ) ZoneScoped # define ZoneScopedNS( name, depth ) ZoneScopedN( name ) # define ZoneScopedCS( color, depth ) ZoneScopedC( color ) @@ -172,6 +228,13 @@ # define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size ) # define TracyFreeS( ptr, depth ) TracyFree( ptr ) +# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size ) +# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr ) + +# define TracyAllocNS( ptr, size, depth, name ) TracyAlloc( ptr, size, name ) +# define TracyFreeNS( ptr, depth, name ) TracyFree( ptr, name ) +# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAlloc( ptr, size, name ) +# define TracySecureFreeNS( ptr, depth, name ) TracySecureFree( ptr, name ) # define TracyMessageS( txt, size, depth ) TracyMessage( txt, size ) # define TracyMessageLS( txt, depth ) TracyMessageL( txt ) @@ -181,6 +244,7 @@ #define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb ); #define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ); +#define TracyIsConnected tracy::GetProfiler().IsConnected() #endif diff --git a/deps/tracy/TracyC.h b/deps/tracy/TracyC.h index 9df1b65..61011e7 100644 --- a/deps/tracy/TracyC.h +++ b/deps/tracy/TracyC.h @@ -11,6 +11,11 @@ extern "C" { #endif +TRACY_API void ___tracy_set_thread_name( const char* name ); + +#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name ); + + #ifndef TRACY_ENABLE typedef const void* TracyCZoneCtx; @@ -26,6 +31,8 @@ typedef const void* TracyCZoneCtx; #define TracyCAlloc(x,y) #define TracyCFree(x) +#define TracyCSecureAlloc(x,y) +#define TracyCSecureFree(x) #define TracyCFrameMark #define TracyCFrameMarkNamed(x) @@ -47,6 +54,8 @@ typedef const void* TracyCZoneCtx; #define TracyCAllocS(x,y,z) #define TracyCFreeS(x,y) +#define TracyCSecureAllocS(x,y,z) +#define TracyCSecureFreeS(x,y) #define TracyCMessageS(x,y,z) #define TracyCMessageLS(x,y) @@ -81,8 +90,9 @@ struct ___tracy_c_zone_context // This struct, as visible to user, is immutable, so treat it as if const was declared here. typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, const char* function ); -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ); +TRACY_API void ___tracy_init_thread(void); +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ); +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ); TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ); TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ); @@ -112,10 +122,10 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ); #define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value ); -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size ); -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth ); -TRACY_API void ___tracy_emit_memory_free( const void* ptr ); -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth ); +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ); +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ); +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ); +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ); TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ); TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ); @@ -123,16 +133,20 @@ TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t co TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK ) -# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK ) +# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 ) +# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 ) +# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 ) +# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 ) # define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); # define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); # define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); # define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); #else -# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size ); -# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr ); +# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 ); +# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 ); +# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 ); +# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 ); # define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 ); # define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 ); @@ -166,8 +180,10 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); # define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); # define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); -# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth ) -# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth ) +# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 ) +# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 ) +# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 ) +# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 ) # define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); # define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); @@ -181,6 +197,8 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ); # define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size ) # define TracyCFreeS( ptr, depth ) TracyCFree( ptr ) +# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size ) +# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr ) # define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size ) # define TracyCMessageLS( txt, depth ) TracyCMessageL( txt ) diff --git a/deps/tracy/TracyClient.cpp b/deps/tracy/TracyClient.cpp index 3b42711..a400cc7 100644 --- a/deps/tracy/TracyClient.cpp +++ b/deps/tracy/TracyClient.cpp @@ -15,6 +15,10 @@ #ifdef TRACY_ENABLE +#ifdef _MSC_VER +# pragma warning(push, 0) +#endif + #include "common/tracy_lz4.cpp" #include "client/TracyProfiler.cpp" #include "client/TracyCallstack.cpp" @@ -42,6 +46,7 @@ #ifdef _MSC_VER # pragma comment(lib, "ws2_32.lib") # pragma comment(lib, "dbghelp.lib") +# pragma warning(pop) #endif #endif diff --git a/deps/tracy/TracyOpenGL.hpp b/deps/tracy/TracyOpenGL.hpp index e4d831d..cfa057a 100644 --- a/deps/tracy/TracyOpenGL.hpp +++ b/deps/tracy/TracyOpenGL.hpp @@ -52,21 +52,21 @@ public: #define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx; #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyGpuNamedZone( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); -# define TracyGpuNamedZoneC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); +# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); +# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); # define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true ) #else -# define TracyGpuNamedZone( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); -# define TracyGpuNamedZoneC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); +# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); +# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); # define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true ) #endif #define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect(); #ifdef TRACY_HAS_CALLSTACK -# define TracyGpuNamedZoneS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); -# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); +# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); +# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); # define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true ) # define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true ) #else @@ -110,7 +110,7 @@ public: MemWrite( &item->gpuNewContext.thread, thread ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); + MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) ); MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); #ifdef TRACY_ON_DEMAND @@ -215,6 +215,8 @@ public: const auto queryId = GetGpuCtx().ptr->NextQueryId(); glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); + GetProfiler().SendCallstack( depth ); + const auto thread = GetThreadHandle(); TracyLfqPrepare( QueueType::GpuZoneBeginCallstack ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); @@ -223,8 +225,6 @@ public: MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); TracyLfqCommit; - - GetProfiler().SendCallstack( depth ); } tracy_force_inline ~GpuCtxScope() diff --git a/deps/tracy/TracyVulkan.hpp b/deps/tracy/TracyVulkan.hpp index 0cf9d63..48942dd 100644 --- a/deps/tracy/TracyVulkan.hpp +++ b/deps/tracy/TracyVulkan.hpp @@ -4,6 +4,7 @@ #if !defined TRACY_ENABLE #define TracyVkContext(x,y,z,w) nullptr +#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr #define TracyVkDestroy(x) #define TracyVkNamedZone(c,x,y,z,w) #define TracyVkNamedZoneC(c,x,y,z,w,a) @@ -19,10 +20,9 @@ namespace tracy { class VkCtxScope {}; -class VkCtx; } -using TracyVkCtx = tracy::VkCtx*; +using TracyVkCtx = void*; #else @@ -43,16 +43,36 @@ class VkCtx enum { QueryCount = 64 * 1024 }; public: - VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) + VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT ) : m_device( device ) + , m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT ) , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) , m_head( 0 ) , m_tail( 0 ) , m_oldCnt( 0 ) , m_queryCount( QueryCount ) + , m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT ) { assert( m_context != 255 ); + if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT ) + { + uint32_t num; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr ); + if( num > 4 ) num = 4; + VkTimeDomainEXT data[4]; + _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data ); + for( uint32_t i=0; i deviation[i] ) + { + minDeviation = deviation[i]; + } + } + m_deviation = minDeviation * 3 / 2; + + m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() ); + + Calibrate( device, m_prevCalibration, tgpu ); + tcpu = Profiler::GetTime(); + } + + uint8_t flags = 0; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration; auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuNewContext ); @@ -105,7 +160,7 @@ public: memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.context, m_context ); - MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); + MemWrite( &item->gpuNewContext.flags, flags ); MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); #ifdef TRACY_ON_DEMAND @@ -133,6 +188,8 @@ public: { vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); m_head = m_tail = 0; + int64_t tgpu; + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu ); return; } #endif @@ -164,6 +221,25 @@ public: Profiler::QueueSerialFinish(); } + if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) + { + int64_t tgpu, tcpu; + Calibrate( m_device, tcpu, tgpu ); + const auto refCpu = Profiler::GetTime(); + const auto delta = tcpu - m_prevCalibration; + if( delta > 0 ) + { + m_prevCalibration = tcpu; + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::GpuCalibration ); + MemWrite( &item->gpuCalibration.gpuTime, tgpu ); + MemWrite( &item->gpuCalibration.cpuTime, refCpu ); + MemWrite( &item->gpuCalibration.cpuDelta, delta ); + MemWrite( &item->gpuCalibration.context, m_context ); + Profiler::QueueSerialFinish(); + } + } + vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); m_tail += cnt; @@ -184,8 +260,35 @@ private: return m_context; } + tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu ) + { + assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ); + VkCalibratedTimestampInfoEXT spec[2] = { + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT }, + { VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain }, + }; + uint64_t ts[2]; + uint64_t deviation; + do + { + m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation ); + } + while( deviation > m_deviation ); + +#if defined _WIN32 || defined __CYGWIN__ + tGpu = ts[0]; + tCpu = ts[1] * m_qpcToNs; +#else + assert( false ); +#endif + } + VkDevice m_device; VkQueryPool m_query; + VkTimeDomainEXT m_timeDomain; + uint64_t m_deviation; + int64_t m_qpcToNs; + int64_t m_prevCalibration; uint8_t m_context; unsigned int m_head; @@ -194,6 +297,8 @@ private: unsigned int m_queryCount; int64_t* m_res; + + PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT; }; class VkCtxScope @@ -237,6 +342,8 @@ public: const auto queryId = ctx->NextQueryId(); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); + GetProfiler().SendCallstack( depth ); + auto item = Profiler::QueueSerial(); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); @@ -245,8 +352,6 @@ public: MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); Profiler::QueueSerialFinish(); - - GetProfiler().SendCallstack( depth ); } tracy_force_inline ~VkCtxScope() @@ -272,11 +377,11 @@ private: VkCtx* m_ctx; }; -static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) +static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct ) { InitRPMallocThread(); auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); - new(ctx) VkCtx( physdev, device, queue, cmdbuf ); + new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct ); return ctx; } @@ -290,24 +395,25 @@ static inline void DestroyVkContext( VkCtx* ctx ) using TracyVkCtx = tracy::VkCtx*; -#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf ); +#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr ); +#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct ); #define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK -# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); -# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); # define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true ) # define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true ) #else -# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active ); -# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active ); +# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active ); +# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active ); # define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true ) # define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true ) #endif #define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf ); #ifdef TRACY_HAS_CALLSTACK -# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active ); -# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active ); +# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active ); +# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active ); # define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true ) # define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true ) #else diff --git a/deps/tracy/client/TracyArmCpuTable.hpp b/deps/tracy/client/TracyArmCpuTable.hpp index 9e9a73d..ff7d976 100644 --- a/deps/tracy/client/TracyArmCpuTable.hpp +++ b/deps/tracy/client/TracyArmCpuTable.hpp @@ -1,10 +1,8 @@ -#ifdef _MSC_VER -# pragma warning(disable:4996) -#endif - namespace tracy { +#if defined __linux__ && defined __ARM_ARCH + static const char* DecodeArmImplementer( uint32_t v ) { static char buf[16]; @@ -16,6 +14,7 @@ static const char* DecodeArmImplementer( uint32_t v ) case 0x44: return "DEC"; case 0x46: return "Fujitsu"; case 0x48: return "HiSilicon"; + case 0x49: return "Infineon"; case 0x4d: return "Motorola"; case 0x4e: return "Nvidia"; case 0x50: return "Applied Micro"; @@ -27,6 +26,7 @@ static const char* DecodeArmImplementer( uint32_t v ) case 0x66: return "Faraday"; case 0x68: return "HXT"; case 0x69: return "Intel"; + case 0xc0: return "Ampere Computing"; default: break; } sprintf( buf, "0x%x", v ); @@ -75,6 +75,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part ) case 0xc60: return " Cortex-M0+"; case 0xd00: return " AArch64 simulator"; case 0xd01: return " Cortex-A32"; + case 0xd02: return " Cortex-A34"; case 0xd03: return " Cortex-A53"; case 0xd04: return " Cortex-A35"; case 0xd05: return " Cortex-A55"; @@ -91,6 +92,10 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part ) case 0xd13: return " Cortex-R52"; case 0xd20: return " Cortex-M23"; case 0xd21: return " Cortex-M33"; + case 0xd40: return " Zeus"; + case 0xd41: return " Cortex-A78"; + case 0xd43: return " Cortex-A65AE"; + case 0xd44: return " Cortex-X1"; case 0xd4a: return " Neoverse E1"; default: break; } @@ -110,6 +115,13 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part ) case 0xa2: return " ThunderX 81XX"; case 0xa3: return " ThunderX 83XX"; case 0xaf: return " ThunderX2 99xx"; + case 0xb0: return " OcteonTX2"; + case 0xb1: return " OcteonTX2 T98"; + case 0xb2: return " OcteonTX2 T96"; + case 0xb3: return " OcteonTX2 F95"; + case 0xb4: return " OcteonTX2 F95N"; + case 0xb5: return " OcteonTX2 F95MM"; + case 0xb8: return " ThunderX3 T110"; default: break; } case 0x44: @@ -212,6 +224,8 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part ) return buf; } +#elif defined __APPLE__ && TARGET_OS_IPHONE == 1 + static const char* DecodeIosDevice( const char* id ) { static const char* DeviceTable[] = { @@ -252,6 +266,7 @@ static const char* DecodeIosDevice( const char* id ) "iPhone12,1", "iPhone 11", "iPhone12,3", "iPhone 11 Pro", "iPhone12,5", "iPhone 11 Pro Max", + "iPhone12,8", "iPhone SE 2nd Gen", "iPad1,1", "iPad (A1219/A1337)", "iPad2,1", "iPad 2 (A1395)", "iPad2,2", "iPad 2 (A1396)", @@ -302,6 +317,10 @@ static const char* DecodeIosDevice( const char* id ) "iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)", "iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)", "iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)", + "iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)", + "iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)", + "iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)", + "iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)", "iPad11,1", "iPad Mini 5th gen (A2133)", "iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)", "iPad11,3", "iPad Air 3rd gen (A2152)", @@ -325,4 +344,6 @@ static const char* DecodeIosDevice( const char* id ) return id; } +#endif + } diff --git a/deps/tracy/client/TracyCallstack.cpp b/deps/tracy/client/TracyCallstack.cpp index f06fbae..a8c9873 100644 --- a/deps/tracy/client/TracyCallstack.cpp +++ b/deps/tracy/client/TracyCallstack.cpp @@ -222,9 +222,9 @@ static const char* GetModuleName( uint64_t addr ) return "[unknown]"; } -SymbolData DecodeSymbolAddress( uint64_t ptr ) +CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) { - SymbolData sym; + CallstackSymbolData sym; IMAGEHLP_LINE64 line; DWORD displacement = 0; line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); @@ -242,9 +242,9 @@ SymbolData DecodeSymbolAddress( uint64_t ptr ) return sym; } -SymbolData DecodeCodeAddress( uint64_t ptr ) +CallstackSymbolData DecodeCodeAddress( uint64_t ptr ) { - SymbolData sym; + CallstackSymbolData sym; const auto proc = GetCurrentProcess(); bool done = false; @@ -442,14 +442,10 @@ const char* DecodeCallstackPtrFast( uint64_t ptr ) static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function ) { - auto& sym = *(SymbolData*)data; + auto& sym = *(CallstackSymbolData*)data; if( !fn ) { - const char* symloc = nullptr; - Dl_info dlinfo; - if( dladdr( (void*)pc, &dlinfo ) ) symloc = dlinfo.dli_fname; - if( !symloc ) symloc = "[unknown]"; - sym.file = symloc; + sym.file = "[unknown]"; sym.line = 0; sym.needFree = false; } @@ -465,20 +461,20 @@ static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, con static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum*/ ) { - auto& sym = *(SymbolData*)data; + auto& sym = *(CallstackSymbolData*)data; sym.file = "[unknown]"; sym.line = 0; sym.needFree = false; } -SymbolData DecodeSymbolAddress( uint64_t ptr ) +CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) { - SymbolData sym; + CallstackSymbolData sym; backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym ); return sym; } -SymbolData DecodeCodeAddress( uint64_t ptr ) +CallstackSymbolData DecodeCodeAddress( uint64_t ptr ) { return DecodeSymbolAddress( ptr ); } @@ -494,14 +490,12 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con if( !fn && !function ) { const char* symname = nullptr; - const char* symloc = nullptr; auto vptr = (void*)pc; ptrdiff_t symoff = 0; Dl_info dlinfo; if( dladdr( vptr, &dlinfo ) ) { - symloc = dlinfo.dli_fname; symname = dlinfo.dli_sname; symoff = (char*)pc - (char*)dlinfo.dli_saddr; @@ -518,7 +512,6 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con } if( !symname ) symname = "[unknown]"; - if( !symloc ) symloc = "[unknown]"; if( symoff == 0 ) { @@ -536,15 +529,7 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con cb_data[cb_num].name = name; } - char buf[32]; - const auto addrlen = sprintf( buf, " [%p]", (void*)pc ); - const auto loclen = strlen( symloc ); - auto loc = (char*)tracy_malloc( loclen + addrlen + 1 ); - memcpy( loc, symloc, loclen ); - memcpy( loc + loclen, buf, addrlen ); - loc[loclen + addrlen] = '\0'; - cb_data[cb_num].file = loc; - + cb_data[cb_num].file = CopyString( "[unknown]" ); cb_data[cb_num].line = 0; } else @@ -652,16 +637,16 @@ const char* DecodeCallstackPtrFast( uint64_t ptr ) return ret; } -SymbolData DecodeSymbolAddress( uint64_t ptr ) +CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ) { const char* symloc = nullptr; Dl_info dlinfo; if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname; if( !symloc ) symloc = "[unknown]"; - return SymbolData { symloc, 0, false }; + return CallstackSymbolData { symloc, 0, false }; } -SymbolData DecodeCodeAddress( uint64_t ptr ) +CallstackSymbolData DecodeCodeAddress( uint64_t ptr ) { return DecodeSymbolAddress( ptr ); } @@ -717,15 +702,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr ) cb.name = name; } - char buf[32]; - const auto addrlen = sprintf( buf, " [%p]", (void*)ptr ); - const auto loclen = strlen( symloc ); - auto loc = (char*)tracy_malloc( loclen + addrlen + 1 ); - memcpy( loc, symloc, loclen ); - memcpy( loc + loclen, buf, addrlen ); - loc[loclen + addrlen] = '\0'; - cb.file = loc; - + cb.file = CopyString( "[unknown]" ); cb.symLen = 0; cb.symAddr = (uint64_t)symaddr; diff --git a/deps/tracy/client/TracyCallstack.hpp b/deps/tracy/client/TracyCallstack.hpp index c5c2059..6b5e702 100644 --- a/deps/tracy/client/TracyCallstack.hpp +++ b/deps/tracy/client/TracyCallstack.hpp @@ -22,7 +22,7 @@ namespace tracy { -struct SymbolData +struct CallstackSymbolData { const char* file; uint32_t line; @@ -45,8 +45,8 @@ struct CallstackEntryData const char* imageName; }; -SymbolData DecodeSymbolAddress( uint64_t ptr ); -SymbolData DecodeCodeAddress( uint64_t ptr ); +CallstackSymbolData DecodeSymbolAddress( uint64_t ptr ); +CallstackSymbolData DecodeCodeAddress( uint64_t ptr ); const char* DecodeCallstackPtrFast( uint64_t ptr ); CallstackEntryData DecodeCallstackPtr( uint64_t ptr ); void InitCallstack(); diff --git a/deps/tracy/client/TracyDxt1.cpp b/deps/tracy/client/TracyDxt1.cpp index 69503de..f1fb1e4 100644 --- a/deps/tracy/client/TracyDxt1.cpp +++ b/deps/tracy/client/TracyDxt1.cpp @@ -172,6 +172,12 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src ) return uint64_t( to565( src[0], src[1], src[2] ) ) << 16; } + __m128i amask = _mm_set1_epi32( 0xFFFFFF ); + px0 = _mm_and_si128( px0, amask ); + px1 = _mm_and_si128( px1, amask ); + px2 = _mm_and_si128( px2, amask ); + px3 = _mm_and_si128( px3, amask ); + __m128i min0 = _mm_min_epu8( px0, px1 ); __m128i min1 = _mm_min_epu8( px2, px3 ); __m128i min2 = _mm_min_epu8( min0, min1 ); @@ -492,6 +498,12 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst ) return; } + __m256i amask = _mm256_set1_epi32( 0xFFFFFF ); + px0 = _mm256_and_si256( px0, amask ); + px1 = _mm256_and_si256( px1, amask ); + px2 = _mm256_and_si256( px2, amask ); + px3 = _mm256_and_si256( px3, amask ); + __m256i min0 = _mm256_min_epu8( px0, px1 ); __m256i min1 = _mm256_min_epu8( px2, px3 ); __m256i min2 = _mm256_min_epu8( min0, min1 ); diff --git a/deps/tracy/client/TracyLock.hpp b/deps/tracy/client/TracyLock.hpp index 5cda66d..e513cdc 100644 --- a/deps/tracy/client/TracyLock.hpp +++ b/deps/tracy/client/TracyLock.hpp @@ -23,7 +23,8 @@ public: { assert( m_id != std::numeric_limits::max() ); - TracyLfqPrepare( QueueType::LockAnnounce ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::LockAnnounce ); MemWrite( &item->lockAnnounce.id, m_id ); MemWrite( &item->lockAnnounce.time, Profiler::GetTime() ); MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); @@ -31,7 +32,7 @@ public: #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem( *item ); #endif - TracyLfqCommit; + Profiler::QueueSerialFinish(); } LockableCtx( const LockableCtx& ) = delete; @@ -39,14 +40,14 @@ public: tracy_force_inline ~LockableCtx() { - TracyLfqPrepare( QueueType::LockTerminate ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::LockTerminate ); MemWrite( &item->lockTerminate.id, m_id ); MemWrite( &item->lockTerminate.time, Profiler::GetTime() ); - MemWrite( &item->lockTerminate.type, LockType::Lockable ); #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem( *item ); #endif - TracyLfqCommit; + Profiler::QueueSerialFinish(); } tracy_force_inline bool BeforeLock() @@ -69,7 +70,6 @@ public: MemWrite( &item->lockWait.thread, GetThreadHandle() ); MemWrite( &item->lockWait.id, m_id ); MemWrite( &item->lockWait.time, Profiler::GetTime() ); - MemWrite( &item->lockWait.type, LockType::Lockable ); Profiler::QueueSerialFinish(); return true; } @@ -155,16 +155,18 @@ public: tracy_force_inline void CustomName( const char* name, size_t size ) { - auto ptr = (char*)tracy_malloc( size+1 ); + assert( size < std::numeric_limits::max() ); + auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, name, size ); - ptr[size] = '\0'; - TracyLfqPrepare( QueueType::LockName ); - MemWrite( &item->lockName.id, m_id ); - MemWrite( &item->lockName.name, (uint64_t)ptr ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::LockName ); + MemWrite( &item->lockNameFat.id, m_id ); + MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); + MemWrite( &item->lockNameFat.size, (uint16_t)size ); #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem( *item ); #endif - TracyLfqCommit; + Profiler::QueueSerialFinish(); } private: @@ -236,17 +238,16 @@ public: { assert( m_id != std::numeric_limits::max() ); - TracyLfqPrepare( QueueType::LockAnnounce ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::LockAnnounce ); MemWrite( &item->lockAnnounce.id, m_id ); MemWrite( &item->lockAnnounce.time, Profiler::GetTime() ); MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); MemWrite( &item->lockAnnounce.type, LockType::SharedLockable ); - #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem( *item ); #endif - - TracyLfqCommit; + Profiler::QueueSerialFinish(); } SharedLockableCtx( const SharedLockableCtx& ) = delete; @@ -254,16 +255,14 @@ public: tracy_force_inline ~SharedLockableCtx() { - TracyLfqPrepare( QueueType::LockTerminate ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::LockTerminate ); MemWrite( &item->lockTerminate.id, m_id ); MemWrite( &item->lockTerminate.time, Profiler::GetTime() ); - MemWrite( &item->lockTerminate.type, LockType::SharedLockable ); - #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem( *item ); #endif - - TracyLfqCommit; + Profiler::QueueSerialFinish(); } tracy_force_inline bool BeforeLock() @@ -286,7 +285,6 @@ public: MemWrite( &item->lockWait.thread, GetThreadHandle() ); MemWrite( &item->lockWait.id, m_id ); MemWrite( &item->lockWait.time, Profiler::GetTime() ); - MemWrite( &item->lockWait.type, LockType::SharedLockable ); Profiler::QueueSerialFinish(); return true; } @@ -369,7 +367,6 @@ public: MemWrite( &item->lockWait.thread, GetThreadHandle() ); MemWrite( &item->lockWait.id, m_id ); MemWrite( &item->lockWait.time, Profiler::GetTime() ); - MemWrite( &item->lockWait.type, LockType::SharedLockable ); Profiler::QueueSerialFinish(); return true; } @@ -455,16 +452,18 @@ public: tracy_force_inline void CustomName( const char* name, size_t size ) { - auto ptr = (char*)tracy_malloc( size+1 ); + assert( size < std::numeric_limits::max() ); + auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, name, size ); - ptr[size] = '\0'; - TracyLfqPrepare( QueueType::LockName ); - MemWrite( &item->lockName.id, m_id ); - MemWrite( &item->lockName.name, (uint64_t)ptr ); + auto item = Profiler::QueueSerial(); + MemWrite( &item->hdr.type, QueueType::LockName ); + MemWrite( &item->lockNameFat.id, m_id ); + MemWrite( &item->lockNameFat.name, (uint64_t)ptr ); + MemWrite( &item->lockNameFat.size, (uint16_t)size ); #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem( *item ); #endif - TracyLfqCommit; + Profiler::QueueSerialFinish(); } private: diff --git a/deps/tracy/client/TracyProfiler.cpp b/deps/tracy/client/TracyProfiler.cpp index ba0e72f..2484754 100644 --- a/deps/tracy/client/TracyProfiler.cpp +++ b/deps/tracy/client/TracyProfiler.cpp @@ -37,6 +37,10 @@ # include #endif +#if defined __APPLE__ +# include "TargetConditionals.h" +#endif + #include #include #include @@ -80,11 +84,6 @@ # endif #endif -#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6 && !defined TARGET_OS_IOS -# include -# include -#endif - #if defined _WIN32 || defined __CYGWIN__ # include extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); @@ -93,9 +92,6 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR # include # include #endif -#if defined __APPLE__ -# include "TargetConditionals.h" -#endif #if defined __linux__ # include # include @@ -231,7 +227,11 @@ static int64_t SetupHwTimer() const char* noCheck = getenv( "TRACY_NO_INVARIANT_CHECK" ); if( !noCheck || noCheck[0] != '1' ) { +#if defined _WIN32 || defined __CYGWIN__ InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC define to use lower resolution timer." ); +#else + InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*." ); +#endif } } #endif @@ -270,6 +270,7 @@ static const char* GetProcessName() return processName; } +#if defined __linux__ && defined __ARM_ARCH static uint32_t GetHex( char*& ptr, int skip ) { uint32_t ret; @@ -287,6 +288,7 @@ static uint32_t GetHex( char*& ptr, int skip ) ptr = end; return ret; } +#endif static const char* GetHostInfo() { @@ -540,6 +542,8 @@ static char s_crashText[1024]; LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) { + if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH; + const unsigned ec = pExp->ExceptionRecord->ExceptionCode; auto msgPtr = s_crashText; switch( ec ) @@ -590,12 +594,12 @@ LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) } { + GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" ); + TracyLfqPrepare( QueueType::CrashReport ); item->crashReport.time = Profiler::GetTime(); item->crashReport.text = (uint64_t)s_crashText; TracyLfqCommit; - - GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" ); } HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); @@ -822,12 +826,12 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) } { + GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" ); + TracyLfqPrepare( QueueType::CrashReport ); item->crashReport.time = Profiler::GetTime(); item->crashReport.text = (uint64_t)s_crashText; TracyLfqCommit; - - GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" ); } DIR* dp = opendir( "/proc/self/task" ); @@ -863,7 +867,7 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ ) enum { QueuePrealloc = 256 * 1024 }; -static Profiler* s_instance; +static Profiler* s_instance = nullptr; static Thread* s_thread; static Thread* s_compressThread; @@ -871,6 +875,19 @@ static Thread* s_compressThread; static Thread* s_sysTraceThread = nullptr; #endif +TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; } + +TRACY_API int64_t GetFrequencyQpc() +{ +#if defined _WIN32 || defined __CYGWIN__ + LARGE_INTEGER t; + QueryPerformanceFrequency( &t ); + return t.QuadPart; +#else + return 0; +#endif +} + #ifdef TRACY_DELAYED_INIT struct ThreadNameData; TRACY_API moodycamel::ConcurrentQueue& GetQueue(); @@ -911,6 +928,25 @@ struct ProfilerThreadData # endif }; +# ifdef TRACY_MANUAL_LIFETIME +ProfilerData* s_profilerData = nullptr; +TRACY_API void StartupProfiler() +{ + s_profilerData = new ProfilerData; + s_profilerData->profiler.SpawnWorkerThreads(); +} +static ProfilerData& GetProfilerData() +{ + assert(s_profilerData); + return *s_profilerData; +} +TRACY_API void ShutdownProfiler() +{ + delete s_profilerData; + s_profilerData = nullptr; + rpmalloc_finalize(); +} +# else static std::atomic profilerDataLock { 0 }; static std::atomic profilerData { nullptr }; @@ -932,6 +968,7 @@ static ProfilerData& GetProfilerData() } return *ptr; } +# endif static ProfilerThreadData& GetProfilerThreadData() { @@ -953,10 +990,12 @@ std::atomic& GetThreadNameData() { return GetProfilerData().thr TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; } # endif +# ifndef TRACY_MANUAL_LIFETIME namespace { const auto& __profiler_init = GetProfiler(); } +# endif #else TRACY_API void InitRPMallocThread() @@ -1044,8 +1083,8 @@ Profiler::Profiler() , m_fiQueue( 16 ) , m_fiDequeue( 16 ) , m_frameCount( 0 ) -#ifdef TRACY_ON_DEMAND , m_isConnected( false ) +#ifdef TRACY_ON_DEMAND , m_connectionId( 0 ) , m_deferredQueue( 64*1024 ) #endif @@ -1081,6 +1120,13 @@ Profiler::Profiler() m_userPort = atoi( userPort ); } +#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME) + SpawnWorkerThreads(); +#endif +} + +void Profiler::SpawnWorkerThreads() +{ s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_thread) Thread( LaunchWorker, this ); @@ -1172,6 +1218,8 @@ void Profiler::Worker() s_profilerTid = syscall( SYS_gettid ); #endif + ThreadExitHandler threadExitHandler; + SetThreadName( "Tracy Profiler" ); #ifdef TRACY_DATA_PORT @@ -1223,6 +1271,12 @@ void Profiler::Worker() uint8_t cpuArch = CpuArchUnknown; #endif +#ifdef TRACY_NO_CODE_TRANSFER + uint8_t codeTransfer = 0; +#else + uint8_t codeTransfer = 1; +#endif + #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 uint32_t regs[4]; char manufacturer[12]; @@ -1250,6 +1304,7 @@ void Profiler::Worker() MemWrite( &welcome.onDemand, onDemand ); MemWrite( &welcome.isApple, isApple ); MemWrite( &welcome.cpuArch, cpuArch ); + MemWrite( &welcome.codeTransfer, codeTransfer ); memcpy( welcome.cpuManufacturer, manufacturer, 12 ); MemWrite( &welcome.cpuId, cpuId ); memcpy( welcome.programName, procname, pnsz ); @@ -1318,6 +1373,11 @@ void Profiler::Worker() #ifndef TRACY_NO_EXIT if( !m_noExit && ShouldExit() ) { + if( m_broadcast ) + { + broadcastMsg.activeTime = -1; + m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); + } m_shutdownFinished.store( true, std::memory_order_relaxed ); return; } @@ -1335,12 +1395,20 @@ void Profiler::Worker() { lastBroadcast = t; const auto ts = std::chrono::duration_cast( std::chrono::system_clock::now().time_since_epoch() ).count(); - broadcastMsg.activeTime = uint32_t( ts - m_epoch ); + broadcastMsg.activeTime = int32_t( ts - m_epoch ); + assert( broadcastMsg.activeTime >= 0 ); m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); } } } + if( m_broadcast ) + { + lastBroadcast = 0; + broadcastMsg.activeTime = -1; + m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); + } + // Handshake { char shibboleth[HandshakeShibbolethSize]; @@ -1378,8 +1446,8 @@ void Profiler::Worker() const auto currentTime = GetTime(); ClearQueues( token ); m_connectionId.fetch_add( 1, std::memory_order_release ); - m_isConnected.store( true, std::memory_order_release ); #endif + m_isConnected.store( true, std::memory_order_release ); HandshakeStatus handshake = HandshakeWelcome; m_sock->Send( &handshake, sizeof( handshake ) ); @@ -1403,16 +1471,19 @@ void Profiler::Worker() for( auto& item : m_deferredQueue ) { uint64_t ptr; + uint16_t size; const auto idx = MemRead( &item.hdr.idx ); switch( (QueueType)idx ) { case QueueType::MessageAppInfo: - ptr = MemRead( &item.message.text ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + ptr = MemRead( &item.messageFat.text ); + size = MemRead( &item.messageFat.size ); + SendSingleString( (const char*)ptr, size ); break; case QueueType::LockName: - ptr = MemRead( &item.lockName.name ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + ptr = MemRead( &item.lockNameFat.name ); + size = MemRead( &item.lockNameFat.size ); + SendSingleString( (const char*)ptr, size ); break; default: break; @@ -1469,8 +1540,8 @@ void Profiler::Worker() } if( ShouldExit() ) break; -#ifdef TRACY_ON_DEMAND m_isConnected.store( false, std::memory_order_release ); +#ifdef TRACY_ON_DEMAND m_bufferOffset = 0; m_bufferStart = 0; #endif @@ -1592,6 +1663,8 @@ void Profiler::Worker() void Profiler::CompressWorker() { + ThreadExitHandler threadExitHandler; + SetThreadName( "Tracy DXT1" ); while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); rpmalloc_thread_initialize(); @@ -1631,12 +1704,12 @@ void Profiler::CompressWorker() tracy_free( fi->image ); TracyLfqPrepare( QueueType::FrameImage ); - MemWrite( &item->frameImage.image, (uint64_t)etc1buf ); - MemWrite( &item->frameImage.frame, fi->frame ); - MemWrite( &item->frameImage.w, w ); - MemWrite( &item->frameImage.h, h ); + MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf ); + MemWrite( &item->frameImageFat.frame, fi->frame ); + MemWrite( &item->frameImageFat.w, w ); + MemWrite( &item->frameImageFat.h, h ); uint8_t flip = fi->flip; - MemWrite( &item->frameImage.flip, flip ); + MemWrite( &item->frameImageFat.flip, flip ); TracyLfqCommit; fi++; @@ -1664,7 +1737,7 @@ static void FreeAssociatedMemory( const QueueItem& item ) { case QueueType::ZoneText: case QueueType::ZoneName: - ptr = MemRead( &item.zoneText.text ); + ptr = MemRead( &item.zoneTextFat.text ); tracy_free( (void*)ptr ); break; case QueueType::Message: @@ -1674,7 +1747,7 @@ static void FreeAssociatedMemory( const QueueItem& item ) #ifndef TRACY_ON_DEMAND case QueueType::MessageAppInfo: #endif - ptr = MemRead( &item.message.text ); + ptr = MemRead( &item.messageFat.text ); tracy_free( (void*)ptr ); break; case QueueType::ZoneBeginAllocSrcLoc: @@ -1683,30 +1756,27 @@ static void FreeAssociatedMemory( const QueueItem& item ) tracy_free( (void*)ptr ); break; case QueueType::CallstackMemory: - ptr = MemRead( &item.callstackMemory.ptr ); - tracy_free( (void*)ptr ); - break; case QueueType::Callstack: - ptr = MemRead( &item.callstack.ptr ); + ptr = MemRead( &item.callstackFat.ptr ); tracy_free( (void*)ptr ); break; case QueueType::CallstackAlloc: - ptr = MemRead( &item.callstackAlloc.nativePtr ); + ptr = MemRead( &item.callstackAllocFat.nativePtr ); tracy_free( (void*)ptr ); - ptr = MemRead( &item.callstackAlloc.ptr ); + ptr = MemRead( &item.callstackAllocFat.ptr ); tracy_free( (void*)ptr ); break; case QueueType::CallstackSample: - ptr = MemRead( &item.callstackSample.ptr ); + ptr = MemRead( &item.callstackSampleFat.ptr ); tracy_free( (void*)ptr ); break; case QueueType::FrameImage: - ptr = MemRead( &item.frameImage.image ); + ptr = MemRead( &item.frameImageFat.image ); tracy_free( (void*)ptr ); break; #ifndef TRACY_ON_DEMAND case QueueType::LockName: - ptr = MemRead( &item.lockName.name ); + ptr = MemRead( &item.lockNameFat.name ); tracy_free( (void*)ptr ); break; #endif @@ -1779,6 +1849,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) while( sz-- > 0 ) { uint64_t ptr; + uint16_t size; auto idx = MemRead( &item->hdr.idx ); if( idx < (int)QueueType::Terminate ) { @@ -1786,21 +1857,29 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) { case QueueType::ZoneText: case QueueType::ZoneName: - ptr = MemRead( &item->zoneText.text ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + ptr = MemRead( &item->zoneTextFat.text ); + size = MemRead( &item->zoneTextFat.size ); + SendSingleString( (const char*)ptr, size ); tracy_free( (void*)ptr ); break; case QueueType::Message: - case QueueType::MessageColor: case QueueType::MessageCallstack: + ptr = MemRead( &item->messageFat.text ); + size = MemRead( &item->messageFat.size ); + SendSingleString( (const char*)ptr, size ); + tracy_free( (void*)ptr ); + break; + case QueueType::MessageColor: case QueueType::MessageColorCallstack: - ptr = MemRead( &item->message.text ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + ptr = MemRead( &item->messageColorFat.text ); + size = MemRead( &item->messageColorFat.size ); + SendSingleString( (const char*)ptr, size ); tracy_free( (void*)ptr ); break; case QueueType::MessageAppInfo: - ptr = MemRead( &item->message.text ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); + ptr = MemRead( &item->messageFat.text ); + size = MemRead( &item->messageFat.size ); + SendSingleString( (const char*)ptr, size ); #ifndef TRACY_ON_DEMAND tracy_free( (void*)ptr ); #endif @@ -1815,54 +1894,44 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) ptr = MemRead( &item->zoneBegin.srcloc ); SendSourceLocationPayload( ptr ); tracy_free( (void*)ptr ); - idx++; - MemWrite( &item->hdr.idx, idx ); break; } case QueueType::Callstack: - ptr = MemRead( &item->callstack.ptr ); + ptr = MemRead( &item->callstackFat.ptr ); SendCallstackPayload( ptr ); tracy_free( (void*)ptr ); - idx++; - MemWrite( &item->hdr.idx, idx ); break; case QueueType::CallstackAlloc: - ptr = MemRead( &item->callstackAlloc.nativePtr ); + ptr = MemRead( &item->callstackAllocFat.nativePtr ); if( ptr != 0 ) { CutCallstack( (void*)ptr, "lua_pcall" ); SendCallstackPayload( ptr ); tracy_free( (void*)ptr ); } - ptr = MemRead( &item->callstackAlloc.ptr ); + ptr = MemRead( &item->callstackAllocFat.ptr ); SendCallstackAlloc( ptr ); tracy_free( (void*)ptr ); - idx++; - MemWrite( &item->hdr.idx, idx ); break; case QueueType::CallstackSample: { - ptr = MemRead( &item->callstackSample.ptr ); + ptr = MemRead( &item->callstackSampleFat.ptr ); SendCallstackPayload64( ptr ); tracy_free( (void*)ptr ); - int64_t t = MemRead( &item->callstackSample.time ); + int64_t t = MemRead( &item->callstackSampleFat.time ); int64_t dt = t - refCtx; refCtx = t; - MemWrite( &item->callstackSample.time, dt ); - idx++; - MemWrite( &item->hdr.idx, idx ); + MemWrite( &item->callstackSampleFat.time, dt ); break; } case QueueType::FrameImage: { - ptr = MemRead( &item->frameImage.image ); - const auto w = MemRead( &item->frameImage.w ); - const auto h = MemRead( &item->frameImage.h ); + ptr = MemRead( &item->frameImageFat.image ); + const auto w = MemRead( &item->frameImageFat.w ); + const auto h = MemRead( &item->frameImageFat.h ); const auto csz = size_t( w * h / 2 ); SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); tracy_free( (void*)ptr ); - idx++; - MemWrite( &item->hdr.idx, idx ); break; } case QueueType::ZoneBegin: @@ -1882,13 +1951,6 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token ) MemWrite( &item->zoneEnd.time, dt ); break; } - case QueueType::LockName: - ptr = MemRead( &item->lockName.name ); - SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); -#ifndef TRACY_ON_DEMAND - tracy_free( (void*)ptr ); -#endif - break; case QueueType::GpuZoneBegin: case QueueType::GpuZoneBeginCallstack: { @@ -2056,11 +2118,9 @@ Profiler::DequeueStatus Profiler::DequeueSerial() switch( (QueueType)idx ) { case QueueType::CallstackMemory: - ptr = MemRead( &item->callstackMemory.ptr ); + ptr = MemRead( &item->callstackFat.ptr ); SendCallstackPayload( ptr ); tracy_free( (void*)ptr ); - idx++; - MemWrite( &item->hdr.idx, idx ); break; case QueueType::LockWait: case QueueType::LockSharedWait: @@ -2089,8 +2149,20 @@ Profiler::DequeueStatus Profiler::DequeueSerial() MemWrite( &item->lockRelease.time, dt ); break; } + case QueueType::LockName: + { + ptr = MemRead( &item->lockNameFat.name ); + uint16_t size = MemRead( &item->lockNameFat.size ); + SendSingleString( (const char*)ptr, size ); +#ifndef TRACY_ON_DEMAND + tracy_free( (void*)ptr ); +#endif + break; + } case QueueType::MemAlloc: + case QueueType::MemAllocNamed: case QueueType::MemAllocCallstack: + case QueueType::MemAllocCallstackNamed: { int64_t t = MemRead( &item->memAlloc.time ); int64_t dt = t - refSerial; @@ -2099,7 +2171,9 @@ Profiler::DequeueStatus Profiler::DequeueSerial() break; } case QueueType::MemFree: + case QueueType::MemFreeNamed: case QueueType::MemFreeCallstack: + case QueueType::MemFreeCallstackNamed: { int64_t t = MemRead( &item->memFree.time ); int64_t dt = t - refSerial; @@ -2166,11 +2240,10 @@ bool Profiler::SendData( const char* data, size_t len ) return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1; } -void Profiler::SendString( uint64_t str, const char* ptr, QueueType type ) +void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type ) { assert( type == QueueType::StringData || type == QueueType::ThreadName || - type == QueueType::CustomStringData || type == QueueType::PlotName || type == QueueType::FrameName || type == QueueType::ExternalName || @@ -2180,7 +2253,6 @@ void Profiler::SendString( uint64_t str, const char* ptr, QueueType type ) MemWrite( &item.hdr.type, type ); MemWrite( &item.stringTransfer.ptr, str ); - auto len = strlen( ptr ); assert( len <= std::numeric_limits::max() ); auto l16 = uint16_t( len ); @@ -2191,6 +2263,36 @@ void Profiler::SendString( uint64_t str, const char* ptr, QueueType type ) AppendDataUnsafe( ptr, l16 ); } +void Profiler::SendSingleString( const char* ptr, size_t len ) +{ + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SingleStringData ); + + assert( len <= std::numeric_limits::max() ); + auto l16 = uint16_t( len ); + + NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 ); + + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] ); + AppendDataUnsafe( &l16, sizeof( l16 ) ); + AppendDataUnsafe( ptr, l16 ); +} + +void Profiler::SendSecondString( const char* ptr, size_t len ) +{ + QueueItem item; + MemWrite( &item.hdr.type, QueueType::SecondStringData ); + + assert( len <= std::numeric_limits::max() ); + auto l16 = uint16_t( len ); + + NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 ); + + AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] ); + AppendDataUnsafe( &l16, sizeof( l16 ) ); + AppendDataUnsafe( ptr, l16 ); +} + void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type ) { assert( type == QueueType::FrameImageData || @@ -2234,16 +2336,17 @@ void Profiler::SendSourceLocationPayload( uint64_t _ptr ) MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); MemWrite( &item.stringTransfer.ptr, _ptr ); - const auto len = *((uint32_t*)ptr); - assert( len <= std::numeric_limits::max() ); - assert( len > 4 ); - const auto l16 = uint16_t( len - 4 ); + uint16_t len; + memcpy( &len, ptr, sizeof( len ) ); + assert( len > 2 ); + len -= 2; + ptr += 2; - NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( l16 ) + l16 ); + NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr + 4, l16 ); + AppendDataUnsafe( &len, sizeof( len ) ); + AppendDataUnsafe( ptr, len ); } void Profiler::SendCallstackPayload( uint64_t _ptr ) @@ -2304,15 +2407,15 @@ void Profiler::SendCallstackAlloc( uint64_t _ptr ) MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload ); MemWrite( &item.stringTransfer.ptr, _ptr ); - const auto len = *((uint32_t*)ptr); - assert( len <= std::numeric_limits::max() ); - const auto l16 = uint16_t( len ); + uint16_t len; + memcpy( &len, ptr, 2 ); + ptr += 2; - NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( l16 ) + l16 ); + NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( len ) + len ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] ); - AppendDataUnsafe( &l16, sizeof( l16 ) ); - AppendDataUnsafe( ptr + 4, l16 ); + AppendDataUnsafe( &len, sizeof( len ) ); + AppendDataUnsafe( ptr, len ); } void Profiler::SendCallstackFrame( uint64_t ptr ) @@ -2321,13 +2424,12 @@ void Profiler::SendCallstackFrame( uint64_t ptr ) const auto frameData = DecodeCallstackPtr( ptr ); { - SendString( uint64_t( frameData.imageName ), frameData.imageName, QueueType::CustomStringData ); + SendSingleString( frameData.imageName ); QueueItem item; MemWrite( &item.hdr.type, QueueType::CallstackFrameSize ); MemWrite( &item.callstackFrameSize.ptr, ptr ); MemWrite( &item.callstackFrameSize.size, frameData.size ); - MemWrite( &item.callstackFrameSize.imageName, (uint64_t)frameData.imageName ); AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrameSize] ); } @@ -2336,23 +2438,14 @@ void Profiler::SendCallstackFrame( uint64_t ptr ) { const auto& frame = frameData.data[i]; - SendString( uint64_t( frame.name ), frame.name, QueueType::CustomStringData ); - SendString( uint64_t( frame.file ), frame.file, QueueType::CustomStringData ); + SendSingleString( frame.name ); + SendSecondString( frame.file ); QueueItem item; MemWrite( &item.hdr.type, QueueType::CallstackFrame ); - MemWrite( &item.callstackFrame.name, (uint64_t)frame.name ); - MemWrite( &item.callstackFrame.file, (uint64_t)frame.file ); MemWrite( &item.callstackFrame.line, frame.line ); MemWrite( &item.callstackFrame.symAddr, frame.symAddr ); - if( frame.symLen > ( 1 << 24 ) ) - { - memset( item.callstackFrame.symLen, 0, 3 ); - } - else - { - memcpy( item.callstackFrame.symLen, &frame.symLen, 3 ); - } + MemWrite( &item.callstackFrame.symLen, frame.symLen ); AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] ); @@ -2383,7 +2476,7 @@ bool Profiler::HandleServerQuery() case ServerQueryThreadString: if( ptr == m_mainThread ) { - SendString( ptr, "Main thread", QueueType::ThreadName ); + SendString( ptr, "Main thread", 11, QueueType::ThreadName ); } else { @@ -2418,9 +2511,11 @@ bool Profiler::HandleServerQuery() case ServerQuerySymbol: HandleSymbolQuery( ptr ); break; +#ifndef TRACY_NO_CODE_TRANSFER case ServerQuerySymbolCode: HandleSymbolCodeQuery( ptr, extra ); break; +#endif case ServerQueryCodeLocation: SendCodeLocation( ptr ); break; @@ -2517,9 +2612,6 @@ void Profiler::HandleDisconnect() void Profiler::CalibrateTimer() { #ifdef TRACY_HW_TIMER -# if !defined TARGET_OS_IOS && __ARM_ARCH >= 6 - m_timerMul = 1.; -# else std::atomic_signal_fence( std::memory_order_acq_rel ); const auto t0 = std::chrono::high_resolution_clock::now(); const auto r0 = GetTime(); @@ -2534,7 +2626,6 @@ void Profiler::CalibrateTimer() const auto dr = r1 - r0; m_timerMul = double( dt ) / double( dr ); -# endif #else m_timerMul = 1.; #endif @@ -2542,7 +2633,7 @@ void Profiler::CalibrateTimer() void Profiler::CalibrateDelay() { - enum { Iterations = 50000 }; + constexpr int Iterations = 50000; auto mindiff = std::numeric_limits::max(); for( int i=0; icallstack.ptr, (uint64_t)ptr ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); TracyLfqCommit; #endif } @@ -2800,11 +2894,10 @@ void Profiler::HandleSymbolQuery( uint64_t symbol ) #ifdef TRACY_HAS_CALLSTACK const auto sym = DecodeSymbolAddress( symbol ); - SendString( uint64_t( sym.file ), sym.file, QueueType::CustomStringData ); + SendSingleString( sym.file ); QueueItem item; MemWrite( &item.hdr.type, QueueType::SymbolInformation ); - MemWrite( &item.symbolInformation.file, uint64_t( sym.file ) ); MemWrite( &item.symbolInformation.line, sym.line ); MemWrite( &item.symbolInformation.symAddr, symbol ); @@ -2824,12 +2917,11 @@ void Profiler::SendCodeLocation( uint64_t ptr ) #ifdef TRACY_HAS_CALLSTACK const auto sym = DecodeCodeAddress( ptr ); - SendString( uint64_t( sym.file ), sym.file, QueueType::CustomStringData ); + SendSingleString( sym.file ); QueueItem item; MemWrite( &item.hdr.type, QueueType::CodeInformation ); MemWrite( &item.codeInformation.ptr, ptr ); - MemWrite( &item.codeInformation.file, uint64_t( sym.file ) ); MemWrite( &item.codeInformation.line, sym.line ); AppendData( &item, QueueDataSize[(int)QueueType::CodeInformation] ); @@ -2900,14 +2992,13 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___trac TracyLfqCommitC; } #endif + tracy::GetProfiler().SendCallstack( depth ); { TracyLfqPrepareC( tracy::QueueType::ZoneBeginCallstack ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); TracyLfqCommitC; } - - tracy::GetProfiler().SendCallstack( depth ); return ctx; } @@ -2966,14 +3057,13 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo TracyLfqCommitC; } #endif + tracy::GetProfiler().SendCallstack( depth ); { TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); TracyLfqCommitC; } - - tracy::GetProfiler().SendCallstack( depth ); return ctx; } @@ -2996,10 +3086,10 @@ TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx ) TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) { + assert( size < std::numeric_limits::max() ); if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size+1 ); + auto ptr = (char*)tracy::tracy_malloc( size ); memcpy( ptr, txt, size ); - ptr[size] = '\0'; #ifndef TRACY_NO_VERIFY { TracyLfqPrepareC( tracy::QueueType::ZoneValidation ); @@ -3009,17 +3099,18 @@ TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size #endif { TracyLfqPrepareC( tracy::QueueType::ZoneText ); - tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr ); + tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); TracyLfqCommitC; } } TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) { + assert( size < std::numeric_limits::max() ); if( !ctx.active ) return; - auto ptr = (char*)tracy::tracy_malloc( size+1 ); + auto ptr = (char*)tracy::tracy_malloc( size ); memcpy( ptr, txt, size ); - ptr[size] = '\0'; #ifndef TRACY_NO_VERIFY { TracyLfqPrepareC( tracy::QueueType::ZoneValidation ); @@ -3029,7 +3120,8 @@ TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size #endif { TracyLfqPrepareC( tracy::QueueType::ZoneName ); - tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr ); + tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size ); TracyLfqCommitC; } } @@ -3051,10 +3143,10 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value ) } } -TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size ) { tracy::Profiler::MemAlloc( ptr, size ); } -TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth ); } -TRACY_API void ___tracy_emit_memory_free( const void* ptr ) { tracy::Profiler::MemFree( ptr ); } -TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth ) { tracy::Profiler::MemFreeCallstack( ptr, depth ); } +TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); } +TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); } TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } @@ -3065,8 +3157,27 @@ TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy: TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } -TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, const char* function ) { return tracy::Profiler::AllocSourceLocation( line, source, function ); } -TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) { return tracy::Profiler::AllocSourceLocation( line, source, function, name, nameSz ); } + +TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz ); +} + +TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) { + return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); +} + +// thread_locals are not initialized on thread creation. At least on GNU/Linux. Instead they are +// initialized on their first ODR-use. This means that the allocator is not automagically +// initialized every time a thread is created. As thus, expose to the C API users a simple API to +// call every time they create a thread. Here we can then put all sorts of per-thread +// initialization. +TRACY_API void ___tracy_init_thread(void) { +#ifdef TRACY_DELAYED_INIT + (void)tracy::GetProfilerThreadData(); +#else + (void)tracy::s_rpmalloc_thread_init; +#endif +} #ifdef __cplusplus } diff --git a/deps/tracy/client/TracyProfiler.hpp b/deps/tracy/client/TracyProfiler.hpp index 3c4a863..cd7ba79 100644 --- a/deps/tracy/client/TracyProfiler.hpp +++ b/deps/tracy/client/TracyProfiler.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include "tracy_concurrentqueue.h" #include "TracyCallstack.hpp" @@ -24,11 +25,11 @@ # include #endif -#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6 +#if defined _WIN32 || defined __CYGWIN__ || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 ) # define TRACY_HW_TIMER #endif -#if !defined TRACY_HW_TIMER || ( defined __ARM_ARCH && __ARM_ARCH >= 6 && !defined CLOCK_MONOTONIC_RAW ) +#if !defined TRACY_HW_TIMER #include #endif @@ -41,6 +42,10 @@ namespace tracy { +#if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME) +void StartupProfiler(); +void ShutdownProfiler(); +#endif class GpuCtx; class Profiler; @@ -59,6 +64,8 @@ TRACY_API std::atomic& GetGpuCtxCounter(); TRACY_API GpuCtxWrapper& GetGpuCtx(); TRACY_API uint64_t GetThreadHandle(); TRACY_API void InitRPMallocThread(); +TRACY_API bool ProfilerAvailable(); +TRACY_API int64_t GetFrequencyQpc(); struct SourceLocationData { @@ -106,7 +113,7 @@ class Profiler struct FrameImageQueueItem { void* image; - uint64_t frame; + uint32_t frame; uint16_t w; uint16_t h; uint8_t offset; @@ -117,19 +124,13 @@ public: Profiler(); ~Profiler(); + void SpawnWorkerThreads(); + static tracy_force_inline int64_t GetTime() { #ifdef TRACY_HW_TIMER # if defined TARGET_OS_IOS && TARGET_OS_IOS == 1 return mach_absolute_time(); -# elif defined __ARM_ARCH && __ARM_ARCH >= 6 -# ifdef CLOCK_MONOTONIC_RAW - struct timespec ts; - clock_gettime( CLOCK_MONOTONIC_RAW, &ts ); - return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec ); -# else - return std::chrono::duration_cast( std::chrono::high_resolution_clock::now().time_since_epoch() ).count(); -# endif # elif defined _WIN32 || defined __CYGWIN__ # ifdef TRACY_TIMER_QPC return GetTimeQpc(); @@ -144,9 +145,17 @@ public: uint64_t rax, rdx; asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) ); return ( rdx << 32 ) + rax; +# else +# error "TRACY_HW_TIMER detection logic needs fixing" # endif #else +# if defined __linux__ && defined CLOCK_MONOTONIC_RAW + struct timespec ts; + clock_gettime( CLOCK_MONOTONIC_RAW, &ts ); + return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec ); +# else return std::chrono::duration_cast( std::chrono::high_resolution_clock::now().time_since_epoch() ).count(); +# endif #endif } @@ -197,6 +206,7 @@ public: static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip ) { auto& profiler = GetProfiler(); + assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits::max() ); #ifdef TRACY_ON_DEMAND if( !profiler.IsConnected() ) return; #endif @@ -207,7 +217,7 @@ public: profiler.m_fiLock.lock(); auto fi = profiler.m_fiQueue.prepare_next(); fi->image = ptr; - fi->frame = profiler.m_frameCount.load( std::memory_order_relaxed ) - offset; + fi->frame = uint32_t( profiler.m_frameCount.load( std::memory_order_relaxed ) - offset ); fi->w = w; fi->h = h; fi->flip = flip; @@ -269,18 +279,19 @@ public: static tracy_force_inline void Message( const char* txt, size_t size, int callstack ) { + assert( size < std::numeric_limits::max() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack ); - auto ptr = (char*)tracy_malloc( size+1 ); - memcpy( ptr, txt, size ); - ptr[size] = '\0'; - MemWrite( &item->message.time, GetTime() ); - MemWrite( &item->message.text, (uint64_t)ptr ); - TracyLfqCommit; - if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); + + TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack ); + auto ptr = (char*)tracy_malloc( size ); + memcpy( ptr, txt, size ); + MemWrite( &item->messageFat.time, GetTime() ); + MemWrite( &item->messageFat.text, (uint64_t)ptr ); + MemWrite( &item->messageFat.size, (uint16_t)size ); + TracyLfqCommit; } static tracy_force_inline void Message( const char* txt, int callstack ) @@ -288,31 +299,32 @@ public: #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); - MemWrite( &item->message.time, GetTime() ); - MemWrite( &item->message.text, (uint64_t)txt ); - TracyLfqCommit; - if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); + + TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack ); + MemWrite( &item->messageLiteral.time, GetTime() ); + MemWrite( &item->messageLiteral.text, (uint64_t)txt ); + TracyLfqCommit; } static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ) { + assert( size < std::numeric_limits::max() ); #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); - auto ptr = (char*)tracy_malloc( size+1 ); - memcpy( ptr, txt, size ); - ptr[size] = '\0'; - MemWrite( &item->messageColor.time, GetTime() ); - MemWrite( &item->messageColor.text, (uint64_t)ptr ); - MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) ); - MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) ); - TracyLfqCommit; - if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); + + TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack ); + auto ptr = (char*)tracy_malloc( size ); + memcpy( ptr, txt, size ); + MemWrite( &item->messageColorFat.time, GetTime() ); + MemWrite( &item->messageColorFat.text, (uint64_t)ptr ); + MemWrite( &item->messageColorFat.r, uint8_t( ( color ) & 0xFF ) ); + MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) ); + MemWrite( &item->messageColorFat.b, uint8_t( ( color >> 16 ) & 0xFF ) ); + MemWrite( &item->messageColorFat.size, (uint16_t)size ); + TracyLfqCommit; } static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack ) @@ -320,26 +332,27 @@ public: #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif - TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); - MemWrite( &item->messageColor.time, GetTime() ); - MemWrite( &item->messageColor.text, (uint64_t)txt ); - MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) ); - MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) ); - MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) ); - TracyLfqCommit; - if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); + + TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack ); + MemWrite( &item->messageColorLiteral.time, GetTime() ); + MemWrite( &item->messageColorLiteral.text, (uint64_t)txt ); + MemWrite( &item->messageColorLiteral.r, uint8_t( ( color ) & 0xFF ) ); + MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) ); + MemWrite( &item->messageColorLiteral.b, uint8_t( ( color >> 16 ) & 0xFF ) ); + TracyLfqCommit; } static tracy_force_inline void MessageAppInfo( const char* txt, size_t size ) { + assert( size < std::numeric_limits::max() ); InitRPMallocThread(); - auto ptr = (char*)tracy_malloc( size+1 ); + auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - ptr[size] = '\0'; TracyLfqPrepare( QueueType::MessageAppInfo ); - MemWrite( &item->message.time, GetTime() ); - MemWrite( &item->message.text, (uint64_t)ptr ); + MemWrite( &item->messageFat.time, GetTime() ); + MemWrite( &item->messageFat.text, (uint64_t)ptr ); + MemWrite( &item->messageFat.size, (uint16_t)size ); #ifdef TRACY_ON_DEMAND GetProfiler().DeferItem( *item ); @@ -348,8 +361,9 @@ public: TracyLfqCommit; } - static tracy_force_inline void MemAlloc( const void* ptr, size_t size ) + static tracy_force_inline void MemAlloc( const void* ptr, size_t size, bool secure ) { + if( secure && !ProfilerAvailable() ) return; #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif @@ -360,8 +374,9 @@ public: GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemFree( const void* ptr ) + static tracy_force_inline void MemFree( const void* ptr, bool secure ) { + if( secure && !ProfilerAvailable() ) return; #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif @@ -372,8 +387,9 @@ public: GetProfiler().m_serialLock.unlock(); } - static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth ) + static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure ) { + if( secure && !ProfilerAvailable() ) return; #ifdef TRACY_HAS_CALLSTACK auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND @@ -385,16 +401,17 @@ public: auto callstack = Callstack( depth ); profiler.m_serialLock.lock(); - SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); SendCallstackMemory( callstack ); + SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size ); profiler.m_serialLock.unlock(); #else - MemAlloc( ptr, size ); + MemAlloc( ptr, size, secure ); #endif } - static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth ) + static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure ) { + if( secure && !ProfilerAvailable() ) return; #ifdef TRACY_HAS_CALLSTACK auto& profiler = GetProfiler(); # ifdef TRACY_ON_DEMAND @@ -406,11 +423,85 @@ public: auto callstack = Callstack( depth ); profiler.m_serialLock.lock(); - SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); SendCallstackMemory( callstack ); + SendMemFree( QueueType::MemFreeCallstack, thread, ptr ); profiler.m_serialLock.unlock(); #else - MemFree( ptr ); + MemFree( ptr, secure ); +#endif + } + + static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name ) + { + if( secure && !ProfilerAvailable() ) return; +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +#endif + const auto thread = GetThreadHandle(); + + GetProfiler().m_serialLock.lock(); + SendMemName( name ); + SendMemAlloc( QueueType::MemAllocNamed, thread, ptr, size ); + GetProfiler().m_serialLock.unlock(); + } + + static tracy_force_inline void MemFreeNamed( const void* ptr, bool secure, const char* name ) + { + if( secure && !ProfilerAvailable() ) return; +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +#endif + const auto thread = GetThreadHandle(); + + GetProfiler().m_serialLock.lock(); + SendMemName( name ); + SendMemFree( QueueType::MemFreeNamed, thread, ptr ); + GetProfiler().m_serialLock.unlock(); + } + + static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name ) + { + if( secure && !ProfilerAvailable() ) return; +#ifdef TRACY_HAS_CALLSTACK + auto& profiler = GetProfiler(); +# ifdef TRACY_ON_DEMAND + if( !profiler.IsConnected() ) return; +# endif + const auto thread = GetThreadHandle(); + + InitRPMallocThread(); + auto callstack = Callstack( depth ); + + profiler.m_serialLock.lock(); + SendCallstackMemory( callstack ); + SendMemName( name ); + SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size ); + profiler.m_serialLock.unlock(); +#else + MemAlloc( ptr, size, secure ); +#endif + } + + static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name ) + { + if( secure && !ProfilerAvailable() ) return; +#ifdef TRACY_HAS_CALLSTACK + auto& profiler = GetProfiler(); +# ifdef TRACY_ON_DEMAND + if( !profiler.IsConnected() ) return; +# endif + const auto thread = GetThreadHandle(); + + InitRPMallocThread(); + auto callstack = Callstack( depth ); + + profiler.m_serialLock.lock(); + SendCallstackMemory( callstack ); + SendMemName( name ); + SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr ); + profiler.m_serialLock.unlock(); +#else + MemFree( ptr, secure ); #endif } @@ -419,7 +510,7 @@ public: #ifdef TRACY_HAS_CALLSTACK auto ptr = Callstack( depth ); TracyLfqPrepare( QueueType::Callstack ); - MemWrite( &item->callstack.ptr, (uint64_t)ptr ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); TracyLfqCommit; #endif } @@ -445,12 +536,12 @@ public: static bool ShouldExit(); -#ifdef TRACY_ON_DEMAND tracy_force_inline bool IsConnected() const { return m_isConnected.load( std::memory_order_acquire ); } +#ifdef TRACY_ON_DEMAND tracy_force_inline uint64_t ConnectionId() const { return m_connectionId.load( std::memory_order_acquire ); @@ -468,11 +559,16 @@ public: void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); } bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); } - void SendString( uint64_t ptr, const char* str, QueueType type ); + void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); } + void SendString( uint64_t str, const char* ptr, size_t len, QueueType type ); + void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); } + void SendSingleString( const char* ptr, size_t len ); + void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); } + void SendSecondString( const char* ptr, size_t len ); // Allocated source location data layout: - // 4b payload size + // 2b payload size // 4b color // 4b source line // fsz function name @@ -483,30 +579,36 @@ public: static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function ) { - const auto fsz = strlen( function ); - const auto ssz = strlen( source ); - const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 ); - auto ptr = (char*)tracy_malloc( sz ); - memcpy( ptr, &sz, 4 ); - memset( ptr + 4, 0, 4 ); - memcpy( ptr + 8, &line, 4 ); - memcpy( ptr + 12, function, fsz+1 ); - memcpy( ptr + 12 + fsz + 1, source, ssz + 1 ); - return uint64_t( ptr ); + return AllocSourceLocation( line, source, function, nullptr, 0 ); } static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) { - const auto fsz = strlen( function ); - const auto ssz = strlen( source ); - const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nameSz ); + return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz ); + } + + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) + { + return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 ); + } + + static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) + { + const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz ); + assert( sz32 <= std::numeric_limits::max() ); + const auto sz = uint16_t( sz32 ); auto ptr = (char*)tracy_malloc( sz ); - memcpy( ptr, &sz, 4 ); - memset( ptr + 4, 0, 4 ); - memcpy( ptr + 8, &line, 4 ); - memcpy( ptr + 12, function, fsz+1 ); - memcpy( ptr + 12 + fsz + 1, source, ssz + 1 ); - memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nameSz ); + memcpy( ptr, &sz, 2 ); + memset( ptr + 2, 0, 4 ); + memcpy( ptr + 6, &line, 4 ); + memcpy( ptr + 10, function, functionSz ); + ptr[10 + functionSz] = '\0'; + memcpy( ptr + 10 + functionSz + 1, source, sourceSz ); + ptr[10 + functionSz + 1 + sourceSz] = '\0'; + if( nameSz != 0 ) + { + memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz ); + } return uint64_t( ptr ); } @@ -575,14 +677,14 @@ private: #ifdef TRACY_HAS_CALLSTACK auto item = GetProfiler().m_serialQueue.prepare_next(); MemWrite( &item->hdr.type, QueueType::CallstackMemory ); - MemWrite( &item->callstackMemory.ptr, (uint64_t)ptr ); + MemWrite( &item->callstackFat.ptr, (uint64_t)ptr ); GetProfiler().m_serialQueue.commit_next(); #endif } static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size ) { - assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack ); + assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed ); auto item = GetProfiler().m_serialQueue.prepare_next(); MemWrite( &item->hdr.type, type ); @@ -605,7 +707,7 @@ private: static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr ) { - assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack ); + assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed ); auto item = GetProfiler().m_serialQueue.prepare_next(); MemWrite( &item->hdr.type, type ); @@ -615,6 +717,15 @@ private: GetProfiler().m_serialQueue.commit_next(); } + static tracy_force_inline void SendMemName( const char* name ) + { + assert( name ); + auto item = GetProfiler().m_serialQueue.prepare_next(); + MemWrite( &item->hdr.type, QueueType::MemNamePayload ); + MemWrite( &item->memName.name, (uint64_t)name ); + GetProfiler().m_serialQueue.commit_next(); + } + #if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC static int64_t GetTimeQpc(); #endif @@ -655,8 +766,8 @@ private: TracyMutex m_fiLock; std::atomic m_frameCount; -#ifdef TRACY_ON_DEMAND std::atomic m_isConnected; +#ifdef TRACY_ON_DEMAND std::atomic m_connectionId; TracyMutex m_deferredLock; diff --git a/deps/tracy/client/TracyRingBuffer.hpp b/deps/tracy/client/TracyRingBuffer.hpp new file mode 100644 index 0000000..29d9355 --- /dev/null +++ b/deps/tracy/client/TracyRingBuffer.hpp @@ -0,0 +1,116 @@ +namespace tracy +{ + +template +class RingBuffer +{ +public: + RingBuffer( int fd ) + : m_fd( fd ) + { + const auto pageSize = uint32_t( getpagesize() ); + assert( Size >= pageSize ); + assert( __builtin_popcount( Size ) == 1 ); + m_mapSize = Size + pageSize; + auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 ); + if( !mapAddr ) + { + m_fd = 0; + close( fd ); + return; + } + m_metadata = (perf_event_mmap_page*)mapAddr; + assert( m_metadata->data_offset == pageSize ); + m_buffer = ((char*)mapAddr) + pageSize; + } + + ~RingBuffer() + { + if( m_metadata ) munmap( m_metadata, m_mapSize ); + if( m_fd ) close( m_fd ); + } + + RingBuffer( const RingBuffer& ) = delete; + RingBuffer& operator=( const RingBuffer& ) = delete; + + RingBuffer( RingBuffer&& other ) + { + memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) ); + m_metadata = nullptr; + m_fd = 0; + } + + RingBuffer& operator=( RingBuffer&& other ) + { + memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) ); + m_metadata = nullptr; + m_fd = 0; + return *this; + } + + bool IsValid() const { return m_metadata != nullptr; } + + void Enable() + { + ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 ); + } + + bool HasData() const + { + const auto head = LoadHead(); + return head > m_metadata->data_tail; + } + + void Read( void* dst, uint64_t offset, uint64_t cnt ) + { + auto src = ( m_metadata->data_tail + offset ) % Size; + if( src + cnt <= Size ) + { + memcpy( dst, m_buffer + src, cnt ); + } + else + { + const auto s0 = Size - src; + memcpy( dst, m_buffer + src, s0 ); + memcpy( (char*)dst + s0, m_buffer, cnt - s0 ); + } + } + + void Advance( uint64_t cnt ) + { + StoreTail( m_metadata->data_tail + cnt ); + } + + bool CheckTscCaps() const + { + return m_metadata->cap_user_time_zero; + } + + int64_t ConvertTimeToTsc( int64_t timestamp ) const + { + assert( m_metadata->cap_user_time_zero ); + const auto time = timestamp - m_metadata->time_zero; + const auto quot = time / m_metadata->time_mult; + const auto rem = time % m_metadata->time_mult; + return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult; + } + +private: + uint64_t LoadHead() const + { + return std::atomic_load_explicit( (const volatile std::atomic*)&m_metadata->data_head, std::memory_order_acquire ); + } + + void StoreTail( uint64_t tail ) + { + std::atomic_store_explicit( (volatile std::atomic*)&m_metadata->data_tail, tail, std::memory_order_release ); + } + + perf_event_mmap_page* m_metadata; + char* m_buffer; + + size_t m_mapSize; + int m_fd; +}; + +} diff --git a/deps/tracy/client/TracyScoped.hpp b/deps/tracy/client/TracyScoped.hpp index 57c7d56..78005e6 100644 --- a/deps/tracy/client/TracyScoped.hpp +++ b/deps/tracy/client/TracyScoped.hpp @@ -1,6 +1,7 @@ #ifndef __TRACYSCOPED_HPP__ #define __TRACYSCOPED_HPP__ +#include #include #include @@ -15,6 +16,11 @@ namespace tracy class ScopedZone { public: + ScopedZone( const ScopedZone& ) = delete; + ScopedZone( ScopedZone&& ) = delete; + ScopedZone& operator=( const ScopedZone& ) = delete; + ScopedZone& operator=( ScopedZone&& ) = delete; + tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true ) #ifdef TRACY_ON_DEMAND : m_active( is_active && GetProfiler().IsConnected() ) @@ -43,12 +49,50 @@ public: #ifdef TRACY_ON_DEMAND m_connectionId = GetProfiler().ConnectionId(); #endif + GetProfiler().SendCallstack( depth ); + TracyLfqPrepare( QueueType::ZoneBeginCallstack ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); TracyLfqCommit; + } + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + m_connectionId = GetProfiler().ConnectionId(); +#endif + TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc ); + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyLfqCommit; + } + + tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true ) +#ifdef TRACY_ON_DEMAND + : m_active( is_active && GetProfiler().IsConnected() ) +#else + : m_active( is_active ) +#endif + { + if( !m_active ) return; +#ifdef TRACY_ON_DEMAND + m_connectionId = GetProfiler().ConnectionId(); +#endif GetProfiler().SendCallstack( depth ); + + TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack ); + const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz ); + MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); + MemWrite( &item->zoneBegin.srcloc, srcloc ); + TracyLfqCommit; } tracy_force_inline ~ScopedZone() @@ -64,29 +108,31 @@ public: tracy_force_inline void Text( const char* txt, size_t size ) { + assert( size < std::numeric_limits::max() ); if( !m_active ) return; #ifdef TRACY_ON_DEMAND if( GetProfiler().ConnectionId() != m_connectionId ) return; #endif - auto ptr = (char*)tracy_malloc( size+1 ); + auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - ptr[size] = '\0'; TracyLfqPrepare( QueueType::ZoneText ); - MemWrite( &item->zoneText.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); TracyLfqCommit; } tracy_force_inline void Name( const char* txt, size_t size ) { + assert( size < std::numeric_limits::max() ); if( !m_active ) return; #ifdef TRACY_ON_DEMAND if( GetProfiler().ConnectionId() != m_connectionId ) return; #endif - auto ptr = (char*)tracy_malloc( size+1 ); + auto ptr = (char*)tracy_malloc( size ); memcpy( ptr, txt, size ); - ptr[size] = '\0'; TracyLfqPrepare( QueueType::ZoneName ); - MemWrite( &item->zoneText.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.text, (uint64_t)ptr ); + MemWrite( &item->zoneTextFat.size, (uint16_t)size ); TracyLfqCommit; } diff --git a/deps/tracy/client/TracySysTrace.cpp b/deps/tracy/client/TracySysTrace.cpp index 076332c..4a0df99 100644 --- a/deps/tracy/client/TracySysTrace.cpp +++ b/deps/tracy/client/TracySysTrace.cpp @@ -21,23 +21,28 @@ # include "../common/TracyAlloc.hpp" # include "../common/TracySystem.hpp" # include "TracyProfiler.hpp" +# include "TracyThread.hpp" namespace tracy { -DEFINE_GUID ( /* ce1dbfb4-137e-4da6-87b0-3f59aa102cbc */ - PerfInfoGuid, - 0xce1dbfb4, - 0x137e, - 0x4da6, - 0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc -); +struct __declspec(uuid("{ce1dbfb4-137e-4da6-87b0-3f59aa102cbc}")) PERFINFOGUID; +static const auto PerfInfoGuid = __uuidof(PERFINFOGUID); + +struct __declspec(uuid("{802EC45A-1E99-4B83-9920-87C98277BA9D}")) DXGKRNLGUID; +static const auto DxgKrnlGuid = __uuidof(DXGKRNLGUID); + static TRACEHANDLE s_traceHandle; static TRACEHANDLE s_traceHandle2; static EVENT_TRACE_PROPERTIES* s_prop; static DWORD s_pid; +static EVENT_TRACE_PROPERTIES* s_propVsync; +static TRACEHANDLE s_traceHandleVsync; +static TRACEHANDLE s_traceHandleVsync2; +Thread* s_threadVsync = nullptr; + struct CSwitch { uint32_t newThreadId; @@ -85,6 +90,19 @@ struct StackWalkEvent uint64_t stack[192]; }; +struct VSyncInfo +{ + void* dxgAdapter; + uint32_t vidPnTargetId; + uint64_t scannedPhysicalAddress; + uint32_t vidPnSourceId; + uint32_t frameNumber; + int64_t frameQpcTime; + void* hFlipDevice; + uint32_t flipType; + uint64_t flipFenceId; +}; + #ifdef __CYGWIN__ extern "C" typedef DWORD (WINAPI *t_GetProcessIdOfThread)( HANDLE ); extern "C" typedef DWORD (WINAPI *t_GetProcessImageFileNameA)( HANDLE, LPSTR, DWORD ); @@ -168,9 +186,9 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record ) memcpy( trace, &sz, sizeof( uint64_t ) ); memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz ); TracyLfqPrepare( QueueType::CallstackSample ); - MemWrite( &item->callstackSample.time, sw->eventTimeStamp ); - MemWrite( &item->callstackSample.thread, (uint64_t)sw->stackThread ); - MemWrite( &item->callstackSample.ptr, (uint64_t)trace ); + MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp ); + MemWrite( &item->callstackSampleFat.thread, (uint64_t)sw->stackThread ); + MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } @@ -181,6 +199,136 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record ) } } +static constexpr const char* VsyncName[] = { + "[0] Vsync", + "[1] Vsync", + "[2] Vsync", + "[3] Vsync", + "[4] Vsync", + "[5] Vsync", + "[6] Vsync", + "[7] Vsync", + "Vsync" +}; + +static uint32_t VsyncTarget[8] = {}; + +void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record ) +{ +#ifdef TRACY_ON_DEMAND + if( !GetProfiler().IsConnected() ) return; +#endif + + const auto& hdr = record->EventHeader; + assert( hdr.ProviderId.Data1 == 0x802EC45A ); + assert( hdr.EventDescriptor.Id == 0x0011 ); + + const auto vs = (const VSyncInfo*)record->UserData; + + int idx = 0; + do + { + if( VsyncTarget[idx] == 0 ) + { + VsyncTarget[idx] = vs->vidPnTargetId; + break; + } + else if( VsyncTarget[idx] == vs->vidPnTargetId ) + { + break; + } + } + while( ++idx < 8 ); + + TracyLfqPrepare( QueueType::FrameMarkMsg ); + MemWrite( &item->frameMark.time, hdr.TimeStamp.QuadPart ); + MemWrite( &item->frameMark.name, uint64_t( VsyncName[idx] ) ); + TracyLfqCommit; +} + +static void SetupVsync() +{ +#if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE + const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH; + s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz ); + memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) ); + s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE; + s_propVsync->Wnode.BufferSize = psz; +#ifdef TRACY_TIMER_QPC + s_propVsync->Wnode.ClientContext = 1; +#else + s_propVsync->Wnode.ClientContext = 3; +#endif + s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); + strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" ); + + auto backup = tracy_malloc( psz ); + memcpy( backup, s_propVsync, psz ); + + const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP ); + if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) + { + tracy_free( backup ); + tracy_free( s_propVsync ); + return; + } + + memcpy( s_propVsync, backup, psz ); + tracy_free( backup ); + + const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync ); + if( startStatus != ERROR_SUCCESS ) + { + tracy_free( s_propVsync ); + return; + } + + EVENT_FILTER_EVENT_ID fe = {}; + fe.FilterIn = TRUE; + fe.Count = 1; + fe.Events[0] = 0x0011; // VSyncDPC_Info + + EVENT_FILTER_DESCRIPTOR desc = {}; + desc.Ptr = (ULONGLONG)&fe; + desc.Size = sizeof( fe ); + desc.Type = EVENT_FILTER_TYPE_EVENT_ID; + + ENABLE_TRACE_PARAMETERS params = {}; + params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2; + params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0; + params.SourceId = s_propVsync->Wnode.Guid; + params.EnableFilterDesc = &desc; + params.FilterDescCount = 1; + + uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base + EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, ¶ms ); + + char loggerName[MAX_PATH]; + strcpy( loggerName, "TracyVsync" ); + + EVENT_TRACE_LOGFILEA log = {}; + log.LoggerName = loggerName; + log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP; + log.EventRecordCallback = EventRecordCallbackVsync; + + s_traceHandleVsync2 = OpenTraceA( &log ); + if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE ) + { + CloseTrace( s_traceHandleVsync ); + tracy_free( s_propVsync ); + return; + } + + s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_threadVsync) Thread( [] (void*) { + ThreadExitHandler threadExitHandler; + SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); + SetThreadName( "Tracy Vsync" ); + ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr ); + }, nullptr ); +#endif +} + bool SysTraceStart( int64_t& samplingPeriod ) { if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); @@ -233,6 +381,8 @@ bool SysTraceStart( int64_t& samplingPeriod ) #endif s_prop->Wnode.Guid = SystemTraceControlGuid; s_prop->BufferSize = 1024; + s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4; + s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6; s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); @@ -242,6 +392,7 @@ bool SysTraceStart( int64_t& samplingPeriod ) const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) { + tracy_free( backup ); tracy_free( s_prop ); return false; } @@ -288,17 +439,29 @@ bool SysTraceStart( int64_t& samplingPeriod ) return false; } + SetupVsync(); + return true; } void SysTraceStop() { + if( s_threadVsync ) + { + CloseTrace( s_traceHandleVsync2 ); + CloseTrace( s_traceHandleVsync ); + s_threadVsync->~Thread(); + tracy_free( s_threadVsync ); + } + CloseTrace( s_traceHandle2 ); CloseTrace( s_traceHandle ); } void SysTraceWorker( void* ptr ) { + ThreadExitHandler threadExitHandler; + SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); SetThreadName( "Tracy SysTrace" ); ProcessTrace( &s_traceHandle2, 1, 0, 0 ); ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); @@ -323,7 +486,7 @@ void SysTraceSendExternalName( uint64_t thread ) auto ret = wcstombs( buf, tmp, 256 ); if( ret != 0 ) { - GetProfiler().SendString( thread, buf, QueueType::ExternalThreadName ); + GetProfiler().SendString( thread, buf, ret, QueueType::ExternalThreadName ); threadSent = true; } } @@ -351,9 +514,10 @@ void SysTraceSendExternalName( uint64_t thread ) if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage ) { char buf2[1024]; - if( _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 ) != 0 ) + const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 ); + if( modlen != 0 ) { - GetProfiler().SendString( thread, buf2, QueueType::ExternalThreadName ); + GetProfiler().SendString( thread, buf2, modlen, QueueType::ExternalThreadName ); threadSent = true; } } @@ -367,7 +531,7 @@ void SysTraceSendExternalName( uint64_t thread ) CloseHandle( hnd ); if( !threadSent ) { - GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName ); + GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName ); threadSent = true; } if( pid != 0 ) @@ -381,7 +545,7 @@ void SysTraceSendExternalName( uint64_t thread ) } if( pid == 4 ) { - GetProfiler().SendString( thread, "System", QueueType::ExternalName ); + GetProfiler().SendString( thread, "System", 6, QueueType::ExternalName ); return; } else @@ -407,9 +571,9 @@ void SysTraceSendExternalName( uint64_t thread ) if( !threadSent ) { - GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName ); + GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName ); } - GetProfiler().SendString( thread, "???", QueueType::ExternalName ); + GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName ); } } @@ -428,8 +592,15 @@ void SysTraceSendExternalName( uint64_t thread ) # include # include # include +# include +# include +# include +# include +# include # include "TracyProfiler.hpp" +# include "TracyRingBuffer.hpp" +# include "TracyThread.hpp" # ifdef __ANDROID__ # include "TracySysTracePayload.hpp" @@ -449,6 +620,173 @@ static const char BufferSizeKb[] = "buffer_size_kb"; static const char TracePipe[] = "trace_pipe"; static std::atomic traceActive { false }; +static Thread* s_threadSampling = nullptr; +static int s_numCpus = 0; + +static constexpr size_t RingBufSize = 64*1024; +static RingBuffer* s_ring = nullptr; + +static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags ) +{ + return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags ); +} + +static void SetupSampling( int64_t& samplingPeriod ) +{ +#ifndef CLOCK_MONOTONIC_RAW + return; +#endif + + samplingPeriod = 100*1000; + + s_numCpus = (int)std::thread::hardware_concurrency(); + s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * s_numCpus ); + + perf_event_attr pe = {}; + + pe.type = PERF_TYPE_SOFTWARE; + pe.size = sizeof( perf_event_attr ); + pe.config = PERF_COUNT_SW_CPU_CLOCK; + + pe.sample_freq = 10000; + pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN; +#if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 ) + pe.sample_max_stack = 127; +#endif + pe.exclude_callchain_kernel = 1; + + pe.disabled = 1; + pe.freq = 1; +#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) + pe.use_clockid = 1; + pe.clockid = CLOCK_MONOTONIC_RAW; +#endif + + for( int i=0; i(); + tracy_free( s_ring ); + return; + } + new( s_ring+i ) RingBuffer( fd ); + } + + s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) ); + new(s_threadSampling) Thread( [] (void*) { + ThreadExitHandler threadExitHandler; + SetThreadName( "Tracy Sampling" ); + sched_param sp = { 5 }; + pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ); + uint32_t currentPid = (uint32_t)getpid(); +#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) + for( int i=0; i(); + tracy_free( s_ring ); + const char* err = "Tracy Profiler: sampling is disabled due to non-native scheduler clock. Are you running under a VM?"; + Profiler::MessageAppInfo( err, strlen( err ) ); + return; + } + } +#endif + for( int i=0; i> 63; + const auto m2 = test >> 47; + if( m1 == m2 ) break; + } + while( --cnt > 0 ); + for( uint64_t j=1; j> 63; + const auto m2 = test >> 47; + if( m1 != m2 ) trace[j] = 0; + } + + // skip kernel frames + uint64_t j; + for( j=0; j= 0 ) break; + } + if( j == cnt ) + { + tracy_free( trace ); + } + else + { + if( j > 0 ) + { + cnt -= j; + memmove( trace+1, trace+1+j, sizeof( uint64_t ) * cnt ); + } + memcpy( trace, &cnt, sizeof( uint64_t ) ); + +#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) + t0 = s_ring[i].ConvertTimeToTsc( t0 ); +#endif + + TracyLfqPrepare( QueueType::CallstackSample ); + MemWrite( &item->callstackSampleFat.time, t0 ); + MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid ); + MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); + TracyLfqCommit; + } + } + } + s_ring[i].Advance( hdr.size ); + } + if( !hadData ) + { + std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); + } + } + + for( int i=0; i(); + tracy_free( s_ring ); + }, nullptr ); +} #ifdef __ANDROID__ static bool TraceWrite( const char* path, size_t psz, const char* val, size_t vsz ) @@ -525,6 +863,10 @@ void SysTraceInjectPayload() bool SysTraceStart( int64_t& samplingPeriod ) { +#ifndef CLOCK_MONOTONIC_RAW + return false; +#endif + if( !TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 ) ) return false; if( !TraceWrite( CurrentTracer, sizeof( CurrentTracer ), "nop", 4 ) ) return false; TraceWrite( TraceOptions, sizeof( TraceOptions ), "norecord-cmd", 13 ); @@ -533,12 +875,12 @@ bool SysTraceStart( int64_t& samplingPeriod ) TraceWrite( TraceOptions, sizeof( TraceOptions ), "noannotate", 11 ); #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) if( !TraceWrite( TraceClock, sizeof( TraceClock ), "x86-tsc", 8 ) ) return false; -#elif __ARM_ARCH >= 6 +#else if( !TraceWrite( TraceClock, sizeof( TraceClock ), "mono_raw", 9 ) ) return false; #endif if( !TraceWrite( SchedSwitch, sizeof( SchedSwitch ), "1", 2 ) ) return false; if( !TraceWrite( SchedWakeup, sizeof( SchedWakeup ), "1", 2 ) ) return false; - if( !TraceWrite( BufferSizeKb, sizeof( BufferSizeKb ), "512", 4 ) ) return false; + if( !TraceWrite( BufferSizeKb, sizeof( BufferSizeKb ), "4096", 5 ) ) return false; #if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH ) SysTraceInjectPayload(); @@ -547,6 +889,8 @@ bool SysTraceStart( int64_t& samplingPeriod ) if( !TraceWrite( TracingOn, sizeof( TracingOn ), "1", 2 ) ) return false; traceActive.store( true, std::memory_order_relaxed ); + SetupSampling( samplingPeriod ); + return true; } @@ -554,23 +898,27 @@ void SysTraceStop() { TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 ); traceActive.store( false, std::memory_order_relaxed ); + if( s_threadSampling ) + { + s_threadSampling->~Thread(); + tracy_free( s_threadSampling ); + } } -static uint64_t ReadNumber( const char*& ptr ) +static uint64_t ReadNumber( const char*& data ) { - uint64_t val = 0; + auto ptr = data; + assert( *ptr >= '0' && *ptr <= '9' ); + uint64_t val = *ptr++ - '0'; for(;;) { - if( *ptr >= '0' && *ptr <= '9' ) - { - val = val * 10 + ( *ptr - '0' ); - ptr++; - } - else - { - return val; - } + const uint8_t v = uint8_t( *ptr - '0' ); + if( v > 9 ) break; + val = val * 10 + v; + ptr++; } + data = ptr; + return val; } static uint8_t ReadState( char state ) @@ -674,7 +1022,7 @@ static void HandleTraceLine( const char* line ) #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) const auto time = ReadNumber( line ); -#elif __ARM_ARCH >= 6 +#else const auto ts = ReadNumber( line ); line++; // '.' const auto tus = ReadNumber( line ); @@ -774,19 +1122,16 @@ static void ProcessTraceLines( int fd ) line = buf; for(;;) { - auto next = line; - while( next < end && *next != '\n' ) next++; - next++; - if( next >= end ) + auto next = (char*)memchr( line, '\n', end - line ); + if( !next ) { const auto lsz = end - line; memmove( buf, line, lsz ); line = buf + lsz; break; } - HandleTraceLine( line ); - line = next; + line = ++next; } if( rd < 64*1024 ) { @@ -799,6 +1144,7 @@ static void ProcessTraceLines( int fd ) void SysTraceWorker( void* ptr ) { + ThreadExitHandler threadExitHandler; SetThreadName( "Tracy SysTrace" ); int pipefd[2]; if( pipe( pipefd ) == 0 ) @@ -812,6 +1158,8 @@ void SysTraceWorker( void* ptr ) if( dup2( pipefd[1], STDOUT_FILENO ) >= 0 ) { close( pipefd[1] ); + sched_param sp = { 4 }; + pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ); #if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH ) execlp( "su", "su", "-c", "/data/tracy_systrace", (char*)nullptr ); #endif @@ -823,6 +1171,8 @@ void SysTraceWorker( void* ptr ) { // parent close( pipefd[1] ); + sched_param sp = { 5 }; + pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ); ProcessTraceLines( pipefd[0] ); close( pipefd[0] ); } @@ -856,14 +1206,10 @@ static void ProcessTraceLines( int fd ) const auto end = buf + rd; for(;;) { - auto next = line; - while( next < end && *next != '\n' ) next++; - if( next == end ) break; - assert( *next == '\n' ); - next++; - + auto next = (char*)memchr( line, '\n', end - line ); + if( !next ) break; HandleTraceLine( line ); - line = next; + line = ++next; } } @@ -872,6 +1218,7 @@ static void ProcessTraceLines( int fd ) void SysTraceWorker( void* ptr ) { + ThreadExitHandler threadExitHandler; SetThreadName( "Tracy SysTrace" ); char tmp[256]; memcpy( tmp, BasePath, sizeof( BasePath ) - 1 ); @@ -879,6 +1226,8 @@ void SysTraceWorker( void* ptr ) int fd = open( tmp, O_RDONLY ); if( fd < 0 ) return; + sched_param sp = { 5 }; + pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ); ProcessTraceLines( fd ); close( fd ); } @@ -900,7 +1249,7 @@ void SysTraceSendExternalName( uint64_t thread ) } else { - GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName ); + GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName ); } sprintf( fn, "/proc/%" PRIu64 "/status", thread ); @@ -909,7 +1258,7 @@ void SysTraceSendExternalName( uint64_t thread ) { int pid = -1; size_t lsz = 1024; - auto line = (char*)malloc( lsz ); + auto line = (char*)tracy_malloc( lsz ); for(;;) { auto rd = getline( &line, &lsz, f ); @@ -920,7 +1269,7 @@ void SysTraceSendExternalName( uint64_t thread ) break; } } - free( line ); + tracy_free( line ); fclose( f ); if( pid >= 0 ) { @@ -944,7 +1293,7 @@ void SysTraceSendExternalName( uint64_t thread ) } } } - GetProfiler().SendString( thread, "???", QueueType::ExternalName ); + GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName ); } } diff --git a/deps/tracy/client/TracyThread.hpp b/deps/tracy/client/TracyThread.hpp index 7fca497..edd255e 100644 --- a/deps/tracy/client/TracyThread.hpp +++ b/deps/tracy/client/TracyThread.hpp @@ -7,9 +7,24 @@ # include #endif +#ifdef TRACY_MANUAL_LIFETIME +# include "tracy_rpmalloc.hpp" +#endif + namespace tracy { +class ThreadExitHandler +{ +public: + ~ThreadExitHandler() + { +#ifdef TRACY_MANUAL_LIFETIME + rpmalloc_thread_finalize(); +#endif + } +}; + #if defined _WIN32 || defined __CYGWIN__ class Thread diff --git a/deps/tracy/client/tracy_concurrentqueue.h b/deps/tracy/client/tracy_concurrentqueue.h index 98c6d28..8d737b6 100644 --- a/deps/tracy/client/tracy_concurrentqueue.h +++ b/deps/tracy/client/tracy_concurrentqueue.h @@ -62,24 +62,6 @@ namespace tracy { -// Exceptions -#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED -#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__)) -#define MOODYCAMEL_EXCEPTIONS_ENABLED -#endif -#endif -#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED -#define MOODYCAMEL_TRY try -#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__) -#define MOODYCAMEL_RETHROW throw -#define MOODYCAMEL_THROW(expr) throw (expr) -#else -#define MOODYCAMEL_TRY if (true) -#define MOODYCAMEL_CATCH(...) else if (false) -#define MOODYCAMEL_RETHROW -#define MOODYCAMEL_THROW(expr) -#endif - #ifndef MOODYCAMEL_NOEXCEPT #if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) #define MOODYCAMEL_NOEXCEPT diff --git a/deps/tracy/client/tracy_rpmalloc.cpp b/deps/tracy/client/tracy_rpmalloc.cpp index 552543e..62c0b1a 100644 --- a/deps/tracy/client/tracy_rpmalloc.cpp +++ b/deps/tracy/client/tracy_rpmalloc.cpp @@ -1372,7 +1372,7 @@ _memory_allocate_heap(void) { heap = (heap_t*)_memory_map((1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size, &align_offset); if (!heap) return heap; - memset(heap, 0, sizeof(heap_t)); + memset((char*)heap, 0, sizeof(heap_t)); heap->align_offset = align_offset; //Get a new heap ID diff --git a/deps/tracy/common/TracyProtocol.hpp b/deps/tracy/common/TracyProtocol.hpp index 2160811..a24d05d 100644 --- a/deps/tracy/common/TracyProtocol.hpp +++ b/deps/tracy/common/TracyProtocol.hpp @@ -9,8 +9,8 @@ namespace tracy constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } -enum : uint32_t { ProtocolVersion = 35 }; -enum : uint32_t { BroadcastVersion = 1 }; +enum : uint32_t { ProtocolVersion = 42 }; +enum : uint16_t { BroadcastVersion = 2 }; using lz4sz_t = uint32_t; @@ -87,6 +87,7 @@ struct WelcomeMessage uint8_t onDemand; uint8_t isApple; uint8_t cpuArch; + uint8_t codeTransfer; char cpuManufacturer[12]; uint32_t cpuId; char programName[WelcomeMessageProgramNameSize]; @@ -107,10 +108,10 @@ enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) }; struct BroadcastMessage { - uint32_t broadcastVersion; + uint16_t broadcastVersion; + uint16_t listenPort; uint32_t protocolVersion; - uint32_t listenPort; - uint32_t activeTime; // in seconds + int32_t activeTime; // in seconds char programName[WelcomeMessageProgramNameSize]; }; diff --git a/deps/tracy/common/TracyQueue.hpp b/deps/tracy/common/TracyQueue.hpp index 4bd48b5..5063cf5 100644 --- a/deps/tracy/common/TracyQueue.hpp +++ b/deps/tracy/common/TracyQueue.hpp @@ -16,19 +16,12 @@ enum class QueueType : uint8_t MessageColorCallstack, MessageAppInfo, ZoneBeginAllocSrcLoc, - ZoneBeginAllocSrcLocLean, ZoneBeginAllocSrcLocCallstack, - ZoneBeginAllocSrcLocCallstackLean, CallstackMemory, - CallstackMemoryLean, Callstack, - CallstackLean, CallstackAlloc, - CallstackAllocLean, CallstackSample, - CallstackSampleLean, FrameImage, - FrameImageLean, ZoneBegin, ZoneBeginCallstack, ZoneEnd, @@ -40,9 +33,13 @@ enum class QueueType : uint8_t LockSharedRelease, LockName, MemAlloc, + MemAllocNamed, MemFree, + MemFreeNamed, MemAllocCallstack, + MemAllocCallstackNamed, MemFreeCallstack, + MemFreeCallstackNamed, GpuZoneBegin, GpuZoneBeginCallstack, GpuZoneEnd, @@ -56,6 +53,7 @@ enum class QueueType : uint8_t Terminate, KeepAlive, ThreadContext, + GpuCalibration, Crash, CrashReport, ZoneValidation, @@ -82,9 +80,11 @@ enum class QueueType : uint8_t ParamSetup, ParamPingback, CpuTopology, + SingleStringData, + SecondStringData, + MemNamePayload, StringData, ThreadName, - CustomStringData, PlotName, SourceLocationPayload, CallstackPayload, @@ -140,15 +140,15 @@ struct QueueFrameMark uint64_t name; // ptr }; -struct QueueFrameImageLean +struct QueueFrameImage { - uint64_t frame; + uint32_t frame; uint16_t w; uint16_t h; uint8_t flip; }; -struct QueueFrameImage : public QueueFrameImageLean +struct QueueFrameImageFat : public QueueFrameImage { uint64_t image; // ptr }; @@ -164,9 +164,10 @@ struct QueueSourceLocation uint8_t b; }; -struct QueueZoneText +struct QueueZoneTextFat { uint64_t text; // ptr + uint16_t size; }; enum class LockType : uint8_t @@ -187,7 +188,6 @@ struct QueueLockTerminate { uint32_t id; int64_t time; - LockType type; }; struct QueueLockWait @@ -195,7 +195,6 @@ struct QueueLockWait uint64_t thread; uint32_t id; int64_t time; - LockType type; }; struct QueueLockObtain @@ -222,7 +221,12 @@ struct QueueLockMark struct QueueLockName { uint32_t id; +}; + +struct QueueLockNameFat : public QueueLockName +{ uint64_t name; // ptr + uint16_t size; }; enum class PlotDataType : uint8_t @@ -248,7 +252,6 @@ struct QueuePlotData struct QueueMessage { int64_t time; - uint64_t text; // ptr }; struct QueueMessageColor : public QueueMessage @@ -258,6 +261,28 @@ struct QueueMessageColor : public QueueMessage uint8_t b; }; +struct QueueMessageLiteral : public QueueMessage +{ + uint64_t text; // ptr +}; + +struct QueueMessageColorLiteral : public QueueMessageColor +{ + uint64_t text; // ptr +}; + +struct QueueMessageFat : public QueueMessage +{ + uint64_t text; // ptr + uint16_t size; +}; + +struct QueueMessageColorFat : public QueueMessageColor +{ + uint64_t text; // ptr + uint16_t size; +}; + // Don't change order, only add new entries at the end, this is also used on trace dumps! enum class GpuContextType : uint8_t { @@ -268,6 +293,11 @@ enum class GpuContextType : uint8_t Direct3D12 }; +enum GpuContextFlags : uint8_t +{ + GpuContextCalibration = 1 << 0 +}; + struct QueueGpuNewContext { int64_t cpuTime; @@ -275,7 +305,7 @@ struct QueueGpuNewContext uint64_t thread; float period; uint8_t context; - uint8_t accuracyBits; + GpuContextFlags flags; GpuContextType type; }; @@ -303,6 +333,19 @@ struct QueueGpuTime uint8_t context; }; +struct QueueGpuCalibration +{ + int64_t gpuTime; + int64_t cpuTime; + int64_t cpuDelta; + uint8_t context; +}; + +struct QueueMemNamePayload +{ + uint64_t name; +}; + struct QueueMemAlloc { int64_t time; @@ -318,29 +361,24 @@ struct QueueMemFree uint64_t ptr; }; -struct QueueCallstackMemory +struct QueueCallstackFat { uint64_t ptr; }; -struct QueueCallstack -{ - uint64_t ptr; -}; - -struct QueueCallstackAlloc +struct QueueCallstackAllocFat { uint64_t ptr; uint64_t nativePtr; }; -struct QueueCallstackSampleLean +struct QueueCallstackSample { int64_t time; uint64_t thread; }; -struct QueueCallstackSample : public QueueCallstackSampleLean +struct QueueCallstackSampleFat : public QueueCallstackSample { uint64_t ptr; }; @@ -349,21 +387,17 @@ struct QueueCallstackFrameSize { uint64_t ptr; uint8_t size; - uint64_t imageName; }; struct QueueCallstackFrame { - uint64_t name; - uint64_t file; uint32_t line; uint64_t symAddr; - char symLen[3]; + uint32_t symLen; }; struct QueueSymbolInformation { - uint64_t file; uint32_t line; uint64_t symAddr; }; @@ -371,7 +405,6 @@ struct QueueSymbolInformation struct QueueCodeInformation { uint64_t ptr; - uint64_t file; uint32_t line; }; @@ -460,9 +493,9 @@ struct QueueItem QueueStringTransfer stringTransfer; QueueFrameMark frameMark; QueueFrameImage frameImage; - QueueFrameImage frameImageLean; + QueueFrameImageFat frameImageFat; QueueSourceLocation srcloc; - QueueZoneText zoneText; + QueueZoneTextFat zoneTextFat; QueueLockAnnounce lockAnnounce; QueueLockTerminate lockTerminate; QueueLockWait lockWait; @@ -470,20 +503,26 @@ struct QueueItem QueueLockRelease lockRelease; QueueLockMark lockMark; QueueLockName lockName; + QueueLockNameFat lockNameFat; QueuePlotData plotData; QueueMessage message; QueueMessageColor messageColor; + QueueMessageLiteral messageLiteral; + QueueMessageColorLiteral messageColorLiteral; + QueueMessageFat messageFat; + QueueMessageColorFat messageColorFat; QueueGpuNewContext gpuNewContext; QueueGpuZoneBegin gpuZoneBegin; QueueGpuZoneEnd gpuZoneEnd; QueueGpuTime gpuTime; + QueueGpuCalibration gpuCalibration; QueueMemAlloc memAlloc; QueueMemFree memFree; - QueueCallstackMemory callstackMemory; - QueueCallstack callstack; - QueueCallstackAlloc callstackAlloc; + QueueMemNamePayload memName; + QueueCallstackFat callstackFat; + QueueCallstackAllocFat callstackAllocFat; QueueCallstackSample callstackSample; - QueueCallstackSampleLean callstackSampleLean; + QueueCallstackSampleFat callstackSampleFat; QueueCallstackFrameSize callstackFrameSize; QueueCallstackFrame callstackFrame; QueueSymbolInformation symbolInformation; @@ -504,27 +543,20 @@ struct QueueItem enum { QueueItemSize = sizeof( QueueItem ) }; static constexpr size_t QueueDataSize[] = { - sizeof( QueueHeader ) + sizeof( QueueZoneText ), - sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name + sizeof( QueueHeader ), // zone text + sizeof( QueueHeader ), // zone name sizeof( QueueHeader ) + sizeof( QueueMessage ), sizeof( QueueHeader ) + sizeof( QueueMessageColor ), sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info - sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, not for network transfer - sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // lean allocated source location - sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, callstack, not for network transfer - sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // lean allocated source location, callstack - sizeof( QueueHeader ) + sizeof( QueueCallstackMemory ), // not for network transfer - sizeof( QueueHeader ), // lean callstack memory - sizeof( QueueHeader ) + sizeof( QueueCallstack ), // not for network transfer - sizeof( QueueHeader ), // lean callstack - sizeof( QueueHeader ) + sizeof( QueueCallstackAlloc ), // not for network transfer - sizeof( QueueHeader ), // lean callstack alloc - sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // not for network transfer - sizeof( QueueHeader ) + sizeof( QueueCallstackSampleLean ), - sizeof( QueueHeader ) + sizeof( QueueFrameImage ), // not for network transfer - sizeof( QueueHeader ) + sizeof( QueueFrameImageLean ), + sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location + sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack + sizeof( QueueHeader ), // callstack memory + sizeof( QueueHeader ), // callstack + sizeof( QueueHeader ), // callstack alloc + sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), + sizeof( QueueHeader ) + sizeof( QueueFrameImage ), sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueZoneEnd ), @@ -536,9 +568,13 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared sizeof( QueueHeader ) + sizeof( QueueLockName ), sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named sizeof( QueueHeader ) + sizeof( QueueMemFree ), + sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), @@ -553,6 +589,7 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ) + sizeof( QueueThreadContext ), + sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ), sizeof( QueueHeader ), // crash sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), @@ -564,10 +601,10 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ), sizeof( QueueHeader ) + sizeof( QueueLockTerminate ), sizeof( QueueHeader ) + sizeof( QueueLockMark ), - sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal - sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal - sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal, callstack - sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal, callstack + sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), + sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), + sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack + sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ), @@ -579,10 +616,12 @@ static constexpr size_t QueueDataSize[] = { sizeof( QueueHeader ) + sizeof( QueueParamSetup ), sizeof( QueueHeader ), // param pingback sizeof( QueueHeader ) + sizeof( QueueCpuTopology ), + sizeof( QueueHeader ), // single string data + sizeof( QueueHeader ), // second string data + sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ), // keep all QueueStringTransfer below sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name - sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload diff --git a/deps/tracy/common/TracySocket.cpp b/deps/tracy/common/TracySocket.cpp index 65874d4..c919616 100644 --- a/deps/tracy/common/TracySocket.cpp +++ b/deps/tracy/common/TracySocket.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -106,36 +107,39 @@ Socket::~Socket() } } -bool Socket::Connect( const char* addr, int port ) +bool Socket::Connect( const char* addr, uint16_t port ) { assert( !IsValid() ); if( m_ptr ) { const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen ); - assert( c == -1 ); -#if defined _WIN32 || defined __CYGWIN__ - const auto err = WSAGetLastError(); - if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false; - if( err != WSAEISCONN ) + if( c == -1 ) { - freeaddrinfo( m_res ); - closesocket( m_connSock ); - m_ptr = nullptr; - return false; - } +#if defined _WIN32 + const auto err = WSAGetLastError(); + if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false; + if( err != WSAEISCONN ) + { + freeaddrinfo( m_res ); + closesocket( m_connSock ); + m_ptr = nullptr; + return false; + } #else - if( errno == EALREADY || errno == EINPROGRESS ) return false; - if( errno != EISCONN ) - { - freeaddrinfo( m_res ); - close( m_connSock ); - m_ptr = nullptr; - return false; - } + const auto err = errno; + if( err == EALREADY || err == EINPROGRESS ) return false; + if( err != EISCONN ) + { + freeaddrinfo( m_res ); + close( m_connSock ); + m_ptr = nullptr; + return false; + } #endif + } -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 u_long nonblocking = 0; ioctlsocket( m_connSock, FIONBIO, &nonblocking ); #else @@ -156,7 +160,7 @@ bool Socket::Connect( const char* addr, int port ) hints.ai_socktype = SOCK_STREAM; char portbuf[32]; - sprintf( portbuf, "%i", port ); + sprintf( portbuf, "%" PRIu16, port ); if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; int sock = 0; @@ -167,7 +171,7 @@ bool Socket::Connect( const char* addr, int port ) int val = 1; setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); #endif -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 u_long nonblocking = 1; ioctlsocket( sock, FIONBIO, &nonblocking ); #else @@ -180,7 +184,7 @@ bool Socket::Connect( const char* addr, int port ) } else { -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 const auto err = WSAGetLastError(); if( err != WSAEWOULDBLOCK ) { @@ -203,7 +207,7 @@ bool Socket::Connect( const char* addr, int port ) freeaddrinfo( res ); if( !ptr ) return false; -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 u_long nonblocking = 0; ioctlsocket( sock, FIONBIO, &nonblocking ); #else @@ -215,6 +219,48 @@ bool Socket::Connect( const char* addr, int port ) return true; } +bool Socket::ConnectBlocking( const char* addr, uint16_t port ) +{ + assert( !IsValid() ); + assert( !m_ptr ); + + struct addrinfo hints; + struct addrinfo *res, *ptr; + + memset( &hints, 0, sizeof( hints ) ); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + + char portbuf[32]; + sprintf( portbuf, "%" PRIu16, port ); + + if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; + int sock = 0; + for( ptr = res; ptr; ptr = ptr->ai_next ) + { + if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue; +#if defined __APPLE__ + int val = 1; + setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); +#endif + if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 ) + { +#ifdef _WIN32 + closesocket( sock ); +#else + close( sock ); +#endif + continue; + } + break; + } + freeaddrinfo( res ); + if( !ptr ) return false; + + m_sock.store( sock, std::memory_order_relaxed ); + return true; +} + void Socket::Close() { const auto sock = m_sock.load( std::memory_order_relaxed ); @@ -247,7 +293,7 @@ int Socket::GetSendBufSize() { const auto sock = m_sock.load( std::memory_order_relaxed ); int bufSize; -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 int sz = sizeof( bufSize ); getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz ); #else @@ -306,6 +352,24 @@ int Socket::Recv( void* _buf, int len, int timeout ) } } +int Socket::ReadUpTo( void* _buf, int len, int timeout ) +{ + const auto sock = m_sock.load( std::memory_order_relaxed ); + auto buf = (char*)_buf; + + int rd = 0; + while( len > 0 ) + { + const auto res = recv( sock, buf, len, 0 ); + if( res == 0 ) break; + if( res == -1 ) return -1; + len -= res; + rd += res; + buf += res; + } + return rd; +} + bool Socket::Read( void* buf, int len, int timeout ) { auto cbuf = (char*)buf; @@ -383,33 +447,45 @@ ListenSocket::~ListenSocket() if( m_sock != -1 ) Close(); } -bool ListenSocket::Listen( int port, int backlog ) +static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res ) +{ + struct addrinfo hints; + memset( &hints, 0, sizeof( hints ) ); + hints.ai_family = ai_family; + hints.ai_socktype = SOCK_STREAM; +#ifndef TRACY_ONLY_LOCALHOST + const char* onlyLocalhost = getenv( "TRACY_ONLY_LOCALHOST" ); + if( !onlyLocalhost || onlyLocalhost[0] != '1' ) + { + hints.ai_flags = AI_PASSIVE; + } +#endif + char portbuf[32]; + sprintf( portbuf, "%" PRIu16, port ); + if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1; + int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol ); + if (sock == -1) freeaddrinfo( *res ); + return sock; +} + +bool ListenSocket::Listen( uint16_t port, int backlog ) { assert( m_sock == -1 ); - struct addrinfo* res; - struct addrinfo hints; + struct addrinfo* res = nullptr; - memset( &hints, 0, sizeof( hints ) ); - hints.ai_family = AF_INET6; - hints.ai_socktype = SOCK_STREAM; -#ifndef TRACY_ONLY_LOCALHOST - hints.ai_flags = AI_PASSIVE; +#ifndef TRACY_ONLY_IPV4 + const char* onlyIPv4 = getenv( "TRACY_ONLY_IPV4" ); + if( !onlyIPv4 || onlyIPv4[0] != '1' ) + { + m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res ); + } #endif - - char portbuf[32]; - sprintf( portbuf, "%i", port ); - - if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false; - - m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol ); if (m_sock == -1) { // IPV6 protocol may not be available/is disabled. Try to create a socket // with the IPV4 protocol - hints.ai_family = AF_INET; - if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false; - m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol ); + m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res ); if( m_sock == -1 ) return false; } #if defined _WIN32 || defined __CYGWIN__ @@ -483,7 +559,7 @@ UdpBroadcast::~UdpBroadcast() if( m_sock != -1 ) Close(); } -bool UdpBroadcast::Open( const char* addr, int port ) +bool UdpBroadcast::Open( const char* addr, uint16_t port ) { assert( m_sock == -1 ); @@ -495,7 +571,7 @@ bool UdpBroadcast::Open( const char* addr, int port ) hints.ai_socktype = SOCK_DGRAM; char portbuf[32]; - sprintf( portbuf, "%i", port ); + sprintf( portbuf, "%" PRIu16, port ); if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; int sock = 0; @@ -506,7 +582,7 @@ bool UdpBroadcast::Open( const char* addr, int port ) int val = 1; setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); #endif -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 unsigned long broadcast = 1; if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) #else @@ -541,7 +617,7 @@ void UdpBroadcast::Close() m_sock = -1; } -int UdpBroadcast::Send( int port, const void* data, int len ) +int UdpBroadcast::Send( uint16_t port, const void* data, int len ) { assert( m_sock != -1 ); struct sockaddr_in addr; @@ -563,8 +639,10 @@ IpAddress::~IpAddress() void IpAddress::Set( const struct sockaddr& addr ) { -#if __MINGW32__ - auto ai = (struct sockaddr_in*)&addr; +#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 ) + struct sockaddr_in tmp; + memcpy( &tmp, &addr, sizeof( tmp ) ); + auto ai = &tmp; #else auto ai = (const struct sockaddr_in*)&addr; #endif @@ -585,7 +663,7 @@ UdpListen::~UdpListen() if( m_sock != -1 ) Close(); } -bool UdpListen::Listen( int port ) +bool UdpListen::Listen( uint16_t port ) { assert( m_sock == -1 ); @@ -596,14 +674,14 @@ bool UdpListen::Listen( int port ) int val = 1; setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); #endif -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 unsigned long reuse = 1; setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) ); #else int reuse = 1; setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) ); #endif -#if defined _WIN32 || defined __CYGWIN__ +#if defined _WIN32 unsigned long broadcast = 1; if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) #else @@ -649,14 +727,14 @@ void UdpListen::Close() m_sock = -1; } -const char* UdpListen::Read( size_t& len, IpAddress& addr ) +const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout ) { static char buf[2048]; struct pollfd fd; fd.fd = (socket_t)m_sock; fd.events = POLLIN; - if( poll( &fd, 1, 10 ) <= 0 ) return nullptr; + if( poll( &fd, 1, timeout ) <= 0 ) return nullptr; sockaddr sa; socklen_t salen = sizeof( struct sockaddr ); diff --git a/deps/tracy/common/TracySocket.hpp b/deps/tracy/common/TracySocket.hpp index c84f7c4..e91f8ea 100644 --- a/deps/tracy/common/TracySocket.hpp +++ b/deps/tracy/common/TracySocket.hpp @@ -23,12 +23,14 @@ public: Socket( int sock ); ~Socket(); - bool Connect( const char* addr, int port ); + bool Connect( const char* addr, uint16_t port ); + bool ConnectBlocking( const char* addr, uint16_t port ); void Close(); int Send( const void* buf, int len ); int GetSendBufSize(); + int ReadUpTo( void* buf, int len, int timeout ); bool Read( void* buf, int len, int timeout ); template @@ -74,7 +76,7 @@ public: ListenSocket(); ~ListenSocket(); - bool Listen( int port, int backlog ); + bool Listen( uint16_t port, int backlog ); Socket* Accept(); void Close(); @@ -93,10 +95,10 @@ public: UdpBroadcast(); ~UdpBroadcast(); - bool Open( const char* addr, int port ); + bool Open( const char* addr, uint16_t port ); void Close(); - int Send( int port, const void* data, int len ); + int Send( uint16_t port, const void* data, int len ); UdpBroadcast( const UdpBroadcast& ) = delete; UdpBroadcast( UdpBroadcast&& ) = delete; @@ -134,10 +136,10 @@ public: UdpListen(); ~UdpListen(); - bool Listen( int port ); + bool Listen( uint16_t port ); void Close(); - const char* Read( size_t& len, IpAddress& addr ); + const char* Read( size_t& len, IpAddress& addr, int timeout ); UdpListen( const UdpListen& ) = delete; UdpListen( UdpListen&& ) = delete; diff --git a/deps/tracy/common/TracySystem.cpp b/deps/tracy/common/TracySystem.cpp index 25ccf9f..18b39da 100644 --- a/deps/tracy/common/TracySystem.cpp +++ b/deps/tracy/common/TracySystem.cpp @@ -237,3 +237,13 @@ TRACY_API const char* GetThreadName( uint64_t id ) } } + +#ifdef __cplusplus +extern "C" { +#endif + +TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); } + +#ifdef __cplusplus +} +#endif diff --git a/src/client/contouring/FlatDualMC.cpp b/src/client/contouring/FlatDualMC.cpp index b519e15..83d6f5c 100644 --- a/src/client/contouring/FlatDualMC.cpp +++ b/src/client/contouring/FlatDualMC.cpp @@ -288,11 +288,10 @@ namespace contouring { for (size_t i = 0; i < surrounding.size(); i++) { auto &edits = surrounding[i]->getEdits(); auto offset = glm::ivec3(surrounding::g_corner_offsets[i]) * CHUNK_LENGTH; - for (auto it = edits.end(); it != edits.begin();) { - it--; - auto p = offset + glm::ivec3(glm::fromIdx(it->idx)); + for (auto it = edits.begin(); it != edits.end(); ++it) { + auto p = offset + glm::ivec3(glm::fromIdx(it->first)); if(p.x < SIZE && p.y < SIZE && p.z < SIZE) { - setCell(p.x, p.y, p.z, it->value); + setCell(p.x, p.y, p.z, it->second.value); } } } diff --git a/src/client/render/vk/CommandCenter.hpp b/src/client/render/vk/CommandCenter.hpp index efb22a0..1c23c79 100644 --- a/src/client/render/vk/CommandCenter.hpp +++ b/src/client/render/vk/CommandCenter.hpp @@ -11,7 +11,7 @@ namespace tracy { class VkCtx; } -typedef tracy::VkCtx* TracyVkCtx; +typedef tracy::VkCtx* TracyVkCtxPtr; namespace render::vk { class SwapChain; @@ -54,7 +54,7 @@ private: VkQueue graphicsQueue; VkCommandPool graphicsPool; std::vector graphicsBuffers; - TracyVkCtx tracyCtx; + TracyVkCtxPtr tracyCtx; BufferGroup uniformBuffers; diff --git a/src/client/world/Chunk.hpp b/src/client/world/Chunk.hpp index a847fc2..9139608 100644 --- a/src/client/world/Chunk.hpp +++ b/src/client/world/Chunk.hpp @@ -16,7 +16,7 @@ public: it->second -= deltaTime; if (it->second <= 0 && animate) { invalidate(it->first.idx); - edits.emplace_back(it->first); + edits.emplace(it->first.idx, it->first); it = futureEdits.erase(it); } else { it++; diff --git a/src/client/world/DistantUniverse.cpp b/src/client/world/DistantUniverse.cpp index d838de6..e38b958 100644 --- a/src/client/world/DistantUniverse.cpp +++ b/src/client/world/DistantUniverse.cpp @@ -9,6 +9,7 @@ #include "../../core/net/io.hpp" #include "../../core/utils/logger.hpp" #include "Chunk.hpp" +#include using namespace world::client; @@ -29,6 +30,8 @@ void DistantUniverse::update(voxel_pos pos, float deltaTime) { { // Update alive areas ZoneScopedN("World"); + auto rng = std::mt19937(std::rand()); + const auto contouringThreshold = rng.max() / (1 + contouring->getQueueSize()); for (auto& area: areas) { ZoneScopedN("Area"); const bool chunkChangeArea = (false && area.second->move(glm::vec3(deltaTime))) || chunkChange; // TODO: area.velocity @@ -41,7 +44,7 @@ void DistantUniverse::update(voxel_pos pos, float deltaTime) { if (glm::length2(diff - it_c->first) > glm::pow2(options.keepDistance)) { it_c = chunks.erase(it_c); } else { - if(const auto neighbors = std::dynamic_pointer_cast(it_c->second)->update(deltaTime, true /*MAYBE: random update*/)) { + if(const auto neighbors = std::dynamic_pointer_cast(it_c->second)->update(deltaTime, rng() < contouringThreshold)) { contouring->onUpdate(std::make_pair(area.first, it_c->first), diff, chunks, neighbors.value()); } ++it_c; @@ -290,24 +293,9 @@ bool DistantUniverse::onPacket(const data::out_view& buf, net::PacketFlags) { if (!fill) break; - if(const auto it = areas.find(fill->pos.first); it != areas.end()) { - auto &chunks = it->second->setChunks(); - auto iterator = world::iterator::Get(fill->shape, fill->radius); - world::iterator::pair point; - while (iterator->next(point)) { - const voxel_pos offset = point.first; - const auto split = glm::splitIdx(fill->pos.second + offset); - if(chunks.inRange(split.first)) { - if(const auto chunk = it->second->setChunks().findInRange(split.first)) { - auto ck = std::dynamic_pointer_cast(chunk.value()); - auto prev = ck->get(split.second); - const auto next = prev.filled(fill->val, point.second); - const auto delay = glm::length2(offset) / fill->radius * .05f; - ck->apply(Chunk::Edit{split.second, next, delay}); - } - } - } - } + world::iterator::Apply(areas, *fill, [](std::shared_ptr &ck, chunk_pos, chunk_voxel_idx idx, Voxel, Voxel next, float delay) { + ck->apply(Chunk::Edit{next, delay, idx}); + }); break; } @@ -395,27 +383,10 @@ void DistantUniverse::emit(const action::packet &action) { peer.send(net::PacketWriter::Of(net::client_packet_type::FILL_SHAPE, *fill)); if (options.editPrediction) { ZoneScopedN("Fill"); - const auto keepDelay = 10 + (peer.getRTT() / 20000.f); // 10s + 50RTT - if(const auto it = areas.find(fill->pos.first); it != areas.end()) { - auto &chunks = it->second->setChunks(); - auto iterator = world::iterator::Get(fill->shape, fill->radius); - world::iterator::pair point; - while (iterator->next(point)) { - const voxel_pos offset = point.first; - const auto split = glm::splitIdx(fill->pos.second + offset); - if(chunks.inRange(split.first)) { - if(const auto chunk = it->second->setChunks().findInRange(split.first)) { - auto ck = std::dynamic_pointer_cast(chunk.value()); - auto prev = ck->get(split.second); - const auto next = prev.filled(fill->val, point.second); - if(prev.value != next.value) { - const auto delay = glm::length2(offset) / fill->radius * .05f; - ck->addFutureEdit(Chunk::Edit{split.second, next, keepDelay - delay * 2}, delay); - } - } - } - } - } + const auto keepDelay = 5 + (peer.getRTT() / 20000.f); // 5s + 50RTT + world::iterator::Apply(areas, *fill, [&](std::shared_ptr &ck, chunk_pos, chunk_voxel_idx idx, Voxel, Voxel next, float delay) { + ck->addFutureEdit(Chunk::Edit{next, keepDelay - delay * 2, idx}, delay); + }); } } else { LOG_W("Bad action " << action.index()); diff --git a/src/core/net/io.hpp b/src/core/net/io.hpp index b9ed5b3..8e7323b 100644 --- a/src/core/net/io.hpp +++ b/src/core/net/io.hpp @@ -43,7 +43,9 @@ public: void* data() { return buffer.writeTo(0); } void reserve(size_t target) { if (target >= buffer.siz - buffer.cur) { - buffer.ptr = (uint8_t*)realloc(buffer.ptr, target + buffer.cur); + const auto size = target + buffer.cur; + buffer.ptr = (uint8_t *)realloc(buffer.ptr, size); + buffer.siz = size; } } void resize(size_t target) { diff --git a/src/core/world/Chunk.hpp b/src/core/world/Chunk.hpp index 643f9ad..0ee2d49 100644 --- a/src/core/world/Chunk.hpp +++ b/src/core/world/Chunk.hpp @@ -13,11 +13,13 @@ namespace world { Chunk(std::istream& str, bool rle = RLE); virtual ~Chunk(); - struct Edit { - chunk_voxel_idx idx; + struct EditBody { Voxel value; float delay; }; + struct Edit: EditBody { + chunk_voxel_idx idx; + }; /// Get voxel from index inline const Voxel& get(chunk_voxel_idx idx) const { diff --git a/src/core/world/EdittableChunk.cpp b/src/core/world/EdittableChunk.cpp index 8b68d60..ea01ed1 100644 --- a/src/core/world/EdittableChunk.cpp +++ b/src/core/world/EdittableChunk.cpp @@ -12,9 +12,9 @@ EdittableChunk::~EdittableChunk() { } std::optional EdittableChunk::update(float deltaTime, bool animate) { ZoneScopedN("Chunk"); for(auto it = edits.begin(); it != edits.end();) { - it->delay -= deltaTime; - if(it->delay <= 0 && animate) { - invalidate(it->idx); + it->second.delay -= deltaTime; + if(it->second.delay <= 0 && animate) { + invalidate(it->first); it = edits.erase(it); } else { it++; @@ -42,8 +42,9 @@ void EdittableChunk::apply(const Edit& edit) { const auto prev = voxels[edit.idx]; if(prev.value != edit.value.value) { voxels[edit.idx] = edit.value; + edits.erase(edit.idx); if(edit.delay > 0) { - edits.emplace_back({edit.idx, prev, edit.delay}); + edits.emplace(edit.idx, EditBody{prev, edit.delay}); } else { invalidate(edit.idx); } diff --git a/src/core/world/EdittableChunk.hpp b/src/core/world/EdittableChunk.hpp index 5b5125c..c45bd43 100644 --- a/src/core/world/EdittableChunk.hpp +++ b/src/core/world/EdittableChunk.hpp @@ -22,8 +22,9 @@ namespace world::client { void apply(const Chunk::Edit &edit); + using edits_t = robin_hood::unordered_map; /// Get pending changes - const std::vector &getEdits() const { return edits; } + const edits_t &getEdits() const { return edits; } static std::optional getNeighborIdx(chunk_voxel_idx idx, Face dir); @@ -31,8 +32,7 @@ namespace world::client { EdittableChunk(); /// Animated changes - /// MAYBE: sort by delay - std::vector edits; + edits_t edits; /// Require update bool upToDate = true; /// Neighbors to update diff --git a/src/core/world/iterators.hpp b/src/core/world/iterators.hpp index fc2f42f..918f48d 100644 --- a/src/core/world/iterators.hpp +++ b/src/core/world/iterators.hpp @@ -18,6 +18,34 @@ protected: /// From -radius to radius std::unique_ptr Get(action::Shape, uint16_t radius); +template +void Apply(area_map &areas, action::FillShape fill, const CB& callback) { + if(const auto it = areas.find(fill.pos.first); it != areas.end()) { + auto &chunks = it->second->setChunks(); + auto iterator = Get(fill.shape, fill.radius); + pair point; + typename std::shared_ptr ck = nullptr; + chunk_pos ck_pos = chunk_pos(INT32_MAX); + while (iterator->next(point)) { + const voxel_pos offset = point.first; + const auto split = glm::splitIdx(fill.pos.second + offset); + if (split.first != ck_pos && chunks.inRange(split.first)) { + if(auto it = chunks.find(split.first); it != chunks.end()) { + ck = std::dynamic_pointer_cast(it->second); + ck_pos = split.first; + } + } + if (split.first == ck_pos) { + auto prev = ck->get(split.second); + const auto next = prev.filled(fill.val, point.second); + if (prev.value != next.value) { + callback(ck, ck_pos, split.second, prev, next, glm::length2(offset) / fill.radius * .05f); + } + } + } + } +} + class Cube final: public Abstract { public: bool next(pair&) override; diff --git a/src/server/net/Server.hpp b/src/server/net/Server.hpp index e67c10d..0af62a3 100644 --- a/src/server/net/Server.hpp +++ b/src/server/net/Server.hpp @@ -58,6 +58,14 @@ public: call(&peer); } } + template + bool anyPeer(P predicate) { + for(auto& peer: peers) { + if(predicate(&peer)) + return true; + } + return false; + } private: std::forward_list peers; diff --git a/src/server/world/SharedParts.hpp b/src/server/world/SharedParts.hpp index 2e2a276..e2999b1 100644 --- a/src/server/world/SharedParts.hpp +++ b/src/server/world/SharedParts.hpp @@ -16,8 +16,9 @@ public: std::optional replace(chunk_voxel_idx idx, const Voxel &val, float delay = 0) override { const auto res = voxels[idx]; set(idx, val); + edits.erase(idx); if(delay > 0) { - edits.emplace_back({idx, res, delay}); + edits.emplace(idx, EditBody{res, delay}); } else { invalidate(idx); } diff --git a/src/server/world/Universe.cpp b/src/server/world/Universe.cpp index 26a893a..fde1011 100644 --- a/src/server/world/Universe.cpp +++ b/src/server/world/Universe.cpp @@ -231,7 +231,7 @@ void Universe::pull() { if (data == nullptr) return; - if (data->pendingEdits.empty() && peer->queueSize(net::server::queue::EDIT) == 0) { + if (!data->pendingEdits.empty() && peer->queueSize(net::server::queue::EDIT) == 0) { peer->send(net::PacketWriter::Of(net::server_packet_type::EDITS, data->pendingEdits.front())); data->pendingEdits.pop(); } @@ -620,8 +620,9 @@ bool Universe::onPacket(net::server::Peer *peer, const data::out_view &buf, net: if (!packet.read(cpos)) break; const auto dist = glm::length2(areaOffset - cpos); - if (dist <= glm::pow2(loadDistance) && chunks.inRange(cpos) && chunks.findInRange(cpos).has_value()) { - data->pushChunk(std::make_pair(id, cpos), dist); + if (dist <= glm::pow2(loadDistance) && chunks.inRange(cpos)) { + if (chunks.findInRange(cpos).has_value()) + data->pushChunk(std::make_pair(id, cpos), dist); } else { LOG_T("Request out of range chunk"); } @@ -692,59 +693,41 @@ bool Universe::isAreaFree(const area_ &pos, const geometry::Shape sha world::ItemList Universe::set(const area_& pos, int radius, action::Shape shape, const Voxel& val) { ZoneScopedN("Fill"); ItemList list; - if(const auto it = areas.find(pos.first); it != areas.end()) { - robin_hood::unordered_map> edits; - auto &chunks = it->second->setChunks(); - auto iterator = world::iterator::Get(shape, radius); - world::iterator::pair point; - while (iterator->next(point)) { - const voxel_pos offset = point.first; - const auto split = glm::splitIdx(pos.second + offset); - if(chunks.inRange(split.first)) { - if(const auto chunk = it->second->setChunks().findInRange(split.first)) { - auto ck = std::dynamic_pointer_cast(chunk.value()); - auto prev = ck->get(split.second); - const auto next = prev.filled(val, point.second); - if(prev.value != next.value) { - //TODO: apply break table - //TODO: inventory - const auto delay = glm::length2(offset) / radius * .05f; - edits[split.first].push_back(Chunk::Edit{split.second, next, delay}); - ck->replace(split.second, next, delay); - } - } - } + const bool stupidClient = host.anyPeer([&](net::server::Peer *peer) { + auto data = peer->getCtx(); + return data && !data->handleEdits; + }); + robin_hood::unordered_map> edits; + world::iterator::Apply(areas, world::action::FillShape(pos, val, shape, radius), + [&](std::shared_ptr& ck, chunk_pos ck_pos, chunk_voxel_idx idx, Voxel /*prev*/, Voxel next, float delay) { + if (stupidClient) + edits[ck_pos].push_back(Chunk::Edit{next, delay, idx}); + //TODO: apply break table + //TODO: inventory + ck->replace(idx, next, delay); + }); + if (stupidClient && !edits.empty()) { + ZoneScopedN("Packet"); + size_t size = sizeof(area_id); + for(const auto& part: edits) { + size += sizeof(chunk_pos); + size += sizeof(chunk_voxel_idx); + size += sizeof(Chunk::Edit) * part.second.size(); } - - bool stupidClient = false; + auto packet = net::PacketWriter(net::server_packet_type::EDITS, size); + packet.write(pos.first); + for(const auto& part: edits) { + packet.write(part.first); + packet.write(part.second.size()); + packet.write(part.second.data(), part.second.size() * sizeof(Chunk::Edit)); + } + auto buffer = packet.finish(); host.iterPeers([&](net::server::Peer *peer) { + //MAYBE: only in range auto data = peer->getCtx(); if (data && !data->handleEdits) - stupidClient = true; + peer->send(buffer, net::server::queue::CHUNK); }); - if (stupidClient) { - ZoneScopedN("Packet"); - size_t size = sizeof(area_id); - for(const auto& part: edits) { - size += sizeof(chunk_pos); - size += sizeof(chunk_voxel_idx); - size += sizeof(Chunk::Edit) * part.second.size(); - } - auto packet = net::PacketWriter(net::server_packet_type::EDITS, size); - packet.write(pos.first); - for(const auto& part: edits) { - packet.write(part.first); - packet.write(part.second.size()); - packet.write(part.second.data(), part.second.size() * sizeof(Chunk::Edit)); - } - auto buffer = packet.finish(); - host.iterPeers([&](net::server::Peer *peer) { - //MAYBE: only in range - auto data = peer->getCtx(); - if (data && !data->handleEdits) - peer->send(buffer, net::server::queue::CHUNK); - }); - } } return list; }