1
0
Fork 0

Update tracy to 0.7.3

This commit is contained in:
May B. 2020-11-07 22:31:17 +01:00
parent fa482b37f3
commit d7012d1554
36 changed files with 1728 additions and 668 deletions

3
deps/tracy/AUTHORS vendored
View File

@ -9,3 +9,6 @@ Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improv
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport) Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
Thales Sabino <thales@codeplay.com> (OpenCL support) Thales Sabino <thales@codeplay.com> (OpenCL support)
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support) Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
Jakub Žádník <kubouch@gmail.com> (csvexport utility)
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)

110
deps/tracy/Tracy.hpp vendored
View File

@ -11,6 +11,9 @@
#define ZoneNamedC(x,y,z) #define ZoneNamedC(x,y,z)
#define ZoneNamedNC(x,y,z,w) #define ZoneNamedNC(x,y,z,w)
#define ZoneTransient(x,y)
#define ZoneTransientN(x,y,z)
#define ZoneScoped #define ZoneScoped
#define ZoneScopedN(x) #define ZoneScopedN(x)
#define ZoneScopedC(x) #define ZoneScopedC(x)
@ -50,12 +53,22 @@
#define TracyAlloc(x,y) #define TracyAlloc(x,y)
#define TracyFree(x) #define TracyFree(x)
#define TracySecureAlloc(x,y)
#define TracySecureFree(x)
#define TracyAllocN(x,y,z)
#define TracyFreeN(x,y)
#define TracySecureAllocN(x,y,z)
#define TracySecureFreeN(x,y)
#define ZoneNamedS(x,y,z) #define ZoneNamedS(x,y,z)
#define ZoneNamedNS(x,y,z,w) #define ZoneNamedNS(x,y,z,w)
#define ZoneNamedCS(x,y,z,w) #define ZoneNamedCS(x,y,z,w)
#define ZoneNamedNCS(x,y,z,w,a) #define ZoneNamedNCS(x,y,z,w,a)
#define ZoneTransientS(x,y,z)
#define ZoneTransientNS(x,y,z,w)
#define ZoneScopedS(x) #define ZoneScopedS(x)
#define ZoneScopedNS(x,y) #define ZoneScopedNS(x,y)
#define ZoneScopedCS(x,y) #define ZoneScopedCS(x,y)
@ -63,6 +76,13 @@
#define TracyAllocS(x,y,z) #define TracyAllocS(x,y,z)
#define TracyFreeS(x,y) #define TracyFreeS(x,y)
#define TracySecureAllocS(x,y,z)
#define TracySecureFreeS(x,y)
#define TracyAllocNS(x,y,z,w)
#define TracyFreeNS(x,y,z)
#define TracySecureAllocNS(x,y,z,w)
#define TracySecureFreeNS(x,y,z)
#define TracyMessageS(x,y,z) #define TracyMessageS(x,y,z)
#define TracyMessageLS(x,y) #define TracyMessageLS(x,y)
@ -71,23 +91,32 @@
#define TracyParameterRegister(x) #define TracyParameterRegister(x)
#define TracyParameterSetup(x,y,z,w) #define TracyParameterSetup(x,y,z,w)
#define TracyIsConnected false
#else #else
#include <string.h>
#include "client/TracyLock.hpp" #include "client/TracyLock.hpp"
#include "client/TracyProfiler.hpp" #include "client/TracyProfiler.hpp"
#include "client/TracyScoped.hpp" #include "client/TracyScoped.hpp"
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); # define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); # define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); # define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active ); # define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active );
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
#else #else
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); # define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); # define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); # define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active ); # define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active );
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
#endif #endif
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true ) #define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
@ -109,13 +138,13 @@
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip ); #define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip );
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() }; #define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() }; #define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() }; #define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() }; #define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define LockableBase( type ) tracy::Lockable<type> #define LockableBase( type ) tracy::Lockable<type>
#define SharedLockableBase( type ) tracy::SharedLockable<type> #define SharedLockableBase( type ) tracy::SharedLockable<type>
#define LockMark( varname ) static const tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname ); #define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
#define LockableName( varname, txt, size ) varname.CustomName( txt, size ); #define LockableName( varname, txt, size ) varname.CustomName( txt, size );
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val ); #define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
@ -129,31 +158,55 @@
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK ); # define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK );
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK ); # define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK ); # define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false );
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK ); # define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false );
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true );
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true );
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name );
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name );
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name );
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name );
#else #else
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 ); # define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 );
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 ); # define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 );
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 ); # define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 );
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 ); # define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size ); # define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false );
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr ); # define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false );
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true );
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true );
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name );
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name );
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name );
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name );
#endif #endif
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
# define ZoneNamedS( varname, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); # define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); # define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedCS( varname, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); # define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); # define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active );
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true ) # define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true ) # define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true ) # define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true ) # define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth ); # define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false );
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth ); # define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false );
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true );
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true );
# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name );
# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name );
# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name );
# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name );
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth ); # define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth );
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth ); # define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth );
@ -165,6 +218,9 @@
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active ) # define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active ) # define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
# define ZoneScopedS( depth ) ZoneScoped # define ZoneScopedS( depth ) ZoneScoped
# define ZoneScopedNS( name, depth ) ZoneScopedN( name ) # define ZoneScopedNS( name, depth ) ZoneScopedN( name )
# define ZoneScopedCS( color, depth ) ZoneScopedC( color ) # define ZoneScopedCS( color, depth ) ZoneScopedC( color )
@ -172,6 +228,13 @@
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size ) # define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
# define TracyFreeS( ptr, depth ) TracyFree( ptr ) # define TracyFreeS( ptr, depth ) TracyFree( ptr )
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
# define TracyAllocNS( ptr, size, depth, name ) TracyAlloc( ptr, size, name )
# define TracyFreeNS( ptr, depth, name ) TracyFree( ptr, name )
# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAlloc( ptr, size, name )
# define TracySecureFreeNS( ptr, depth, name ) TracySecureFree( ptr, name )
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size ) # define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
# define TracyMessageLS( txt, depth ) TracyMessageL( txt ) # define TracyMessageLS( txt, depth ) TracyMessageL( txt )
@ -181,6 +244,7 @@
#define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb ); #define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb );
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val ); #define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val );
#define TracyIsConnected tracy::GetProfiler().IsConnected()
#endif #endif

42
deps/tracy/TracyC.h vendored
View File

@ -11,6 +11,11 @@
extern "C" { extern "C" {
#endif #endif
TRACY_API void ___tracy_set_thread_name( const char* name );
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
#ifndef TRACY_ENABLE #ifndef TRACY_ENABLE
typedef const void* TracyCZoneCtx; typedef const void* TracyCZoneCtx;
@ -26,6 +31,8 @@ typedef const void* TracyCZoneCtx;
#define TracyCAlloc(x,y) #define TracyCAlloc(x,y)
#define TracyCFree(x) #define TracyCFree(x)
#define TracyCSecureAlloc(x,y)
#define TracyCSecureFree(x)
#define TracyCFrameMark #define TracyCFrameMark
#define TracyCFrameMarkNamed(x) #define TracyCFrameMarkNamed(x)
@ -47,6 +54,8 @@ typedef const void* TracyCZoneCtx;
#define TracyCAllocS(x,y,z) #define TracyCAllocS(x,y,z)
#define TracyCFreeS(x,y) #define TracyCFreeS(x,y)
#define TracyCSecureAllocS(x,y,z)
#define TracyCSecureFreeS(x,y)
#define TracyCMessageS(x,y,z) #define TracyCMessageS(x,y,z)
#define TracyCMessageLS(x,y) #define TracyCMessageLS(x,y)
@ -81,8 +90,9 @@ struct ___tracy_c_zone_context
// This struct, as visible to user, is immutable, so treat it as if const was declared here. // This struct, as visible to user, is immutable, so treat it as if const was declared here.
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx; typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, const char* function ); TRACY_API void ___tracy_init_thread(void);
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ); TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz );
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active ); TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active ); TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
@ -112,10 +122,10 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value ); #define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size ); TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth ); TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
TRACY_API void ___tracy_emit_memory_free( const void* ptr ); TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth ); TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack ); TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ); TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
@ -123,16 +133,20 @@ TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t co
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ); TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK ) # define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK ) # define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK ); # define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK ); # define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK ); # define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK ); # define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
#else #else
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size ); # define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr ); # define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 ); # define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 ); # define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
@ -166,8 +180,10 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); # define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active ); # define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth ) # define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth ) # define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth ); # define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth ); # define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
@ -181,6 +197,8 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size ) # define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr ) # define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size ) # define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt ) # define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )

View File

@ -15,6 +15,10 @@
#ifdef TRACY_ENABLE #ifdef TRACY_ENABLE
#ifdef _MSC_VER
# pragma warning(push, 0)
#endif
#include "common/tracy_lz4.cpp" #include "common/tracy_lz4.cpp"
#include "client/TracyProfiler.cpp" #include "client/TracyProfiler.cpp"
#include "client/TracyCallstack.cpp" #include "client/TracyCallstack.cpp"
@ -42,6 +46,7 @@
#ifdef _MSC_VER #ifdef _MSC_VER
# pragma comment(lib, "ws2_32.lib") # pragma comment(lib, "ws2_32.lib")
# pragma comment(lib, "dbghelp.lib") # pragma comment(lib, "dbghelp.lib")
# pragma warning(pop)
#endif #endif
#endif #endif

View File

@ -52,21 +52,21 @@ public:
#define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx; #define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyGpuNamedZone( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); # define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active ); # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true ) # define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
#else #else
# define TracyGpuNamedZone( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); # define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active ); # define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true ) # define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true ) # define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
#endif #endif
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect(); #define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
# define TracyGpuNamedZoneS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); # define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active ); # define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true ) # define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true ) # define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
#else #else
@ -110,7 +110,7 @@ public:
MemWrite( &item->gpuNewContext.thread, thread ); MemWrite( &item->gpuNewContext.thread, thread );
MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context ); MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits ); MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl ); MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
@ -215,6 +215,8 @@ public:
const auto queryId = GetGpuCtx().ptr->NextQueryId(); const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP ); glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
GetProfiler().SendCallstack( depth );
const auto thread = GetThreadHandle(); const auto thread = GetThreadHandle();
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack ); TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
@ -223,8 +225,6 @@ public:
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() ); MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
TracyLfqCommit; TracyLfqCommit;
GetProfiler().SendCallstack( depth );
} }
tracy_force_inline ~GpuCtxScope() tracy_force_inline ~GpuCtxScope()

View File

@ -4,6 +4,7 @@
#if !defined TRACY_ENABLE #if !defined TRACY_ENABLE
#define TracyVkContext(x,y,z,w) nullptr #define TracyVkContext(x,y,z,w) nullptr
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
#define TracyVkDestroy(x) #define TracyVkDestroy(x)
#define TracyVkNamedZone(c,x,y,z,w) #define TracyVkNamedZone(c,x,y,z,w)
#define TracyVkNamedZoneC(c,x,y,z,w,a) #define TracyVkNamedZoneC(c,x,y,z,w,a)
@ -19,10 +20,9 @@
namespace tracy namespace tracy
{ {
class VkCtxScope {}; class VkCtxScope {};
class VkCtx;
} }
using TracyVkCtx = tracy::VkCtx*; using TracyVkCtx = void*;
#else #else
@ -43,16 +43,36 @@ class VkCtx
enum { QueryCount = 64 * 1024 }; enum { QueryCount = 64 * 1024 };
public: public:
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
: m_device( device ) : m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) ) , m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 ) , m_head( 0 )
, m_tail( 0 ) , m_tail( 0 )
, m_oldCnt( 0 ) , m_oldCnt( 0 )
, m_queryCount( QueryCount ) , m_queryCount( QueryCount )
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
{ {
assert( m_context != 255 ); assert( m_context != 255 );
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
{
uint32_t num;
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
if( num > 4 ) num = 4;
VkTimeDomainEXT data[4];
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
for( uint32_t i=0; i<num; i++ )
{
// TODO VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT
if( data[i] == VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT )
{
m_timeDomain = data[i];
break;
}
}
}
VkPhysicalDeviceProperties prop; VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties( physdev, &prop ); vkGetPhysicalDeviceProperties( physdev, &prop );
const float period = prop.limits.timestampPeriod; const float period = prop.limits.timestampPeriod;
@ -82,21 +102,56 @@ public:
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue ); vkQueueWaitIdle( queue );
vkBeginCommandBuffer( cmdbuf, &beginInfo ); int64_t tcpu, tgpu;
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 ); if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
vkEndCommandBuffer( cmdbuf ); {
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkQueueWaitIdle( queue ); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
int64_t tcpu = Profiler::GetTime(); tcpu = Profiler::GetTime();
int64_t tgpu; vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
vkBeginCommandBuffer( cmdbuf, &beginInfo ); vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 ); vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf ); vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE ); vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue ); vkQueueWaitIdle( queue );
}
else
{
enum { NumProbes = 32 };
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( int i=0; i<NumProbes; i++ )
{
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
}
uint64_t minDeviation = deviation[0];
for( int i=1; i<NumProbes; i++ )
{
if( minDeviation > deviation[i] )
{
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime();
}
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext ); MemWrite( &item->hdr.type, QueueType::GpuNewContext );
@ -105,7 +160,7 @@ public:
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) ); memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period ); MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context ); MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) ); MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan ); MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
@ -133,6 +188,8 @@ public:
{ {
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount ); vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
m_head = m_tail = 0; m_head = m_tail = 0;
int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
return; return;
} }
#endif #endif
@ -164,6 +221,25 @@ public:
Profiler::QueueSerialFinish(); Profiler::QueueSerialFinish();
} }
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
{
int64_t tgpu, tcpu;
Calibrate( m_device, tcpu, tgpu );
const auto refCpu = Profiler::GetTime();
const auto delta = tcpu - m_prevCalibration;
if( delta > 0 )
{
m_prevCalibration = tcpu;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
Profiler::QueueSerialFinish();
}
}
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt ); vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
m_tail += cnt; m_tail += cnt;
@ -184,8 +260,35 @@ private:
return m_context; return m_context;
} }
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation;
do
{
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
}
while( deviation > m_deviation );
#if defined _WIN32 || defined __CYGWIN__
tGpu = ts[0];
tCpu = ts[1] * m_qpcToNs;
#else
assert( false );
#endif
}
VkDevice m_device; VkDevice m_device;
VkQueryPool m_query; VkQueryPool m_query;
VkTimeDomainEXT m_timeDomain;
uint64_t m_deviation;
int64_t m_qpcToNs;
int64_t m_prevCalibration;
uint8_t m_context; uint8_t m_context;
unsigned int m_head; unsigned int m_head;
@ -194,6 +297,8 @@ private:
unsigned int m_queryCount; unsigned int m_queryCount;
int64_t* m_res; int64_t* m_res;
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
}; };
class VkCtxScope class VkCtxScope
@ -237,6 +342,8 @@ public:
const auto queryId = ctx->NextQueryId(); const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId ); vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
GetProfiler().SendCallstack( depth );
auto item = Profiler::QueueSerial(); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial ); MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() ); MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
@ -245,8 +352,6 @@ public:
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) ); MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() ); MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish(); Profiler::QueueSerialFinish();
GetProfiler().SendCallstack( depth );
} }
tracy_force_inline ~VkCtxScope() tracy_force_inline ~VkCtxScope()
@ -272,11 +377,11 @@ private:
VkCtx* m_ctx; VkCtx* m_ctx;
}; };
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf ) static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
{ {
InitRPMallocThread(); InitRPMallocThread();
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) ); auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
new(ctx) VkCtx( physdev, device, queue, cmdbuf ); new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
return ctx; return ctx;
} }
@ -290,24 +395,25 @@ static inline void DestroyVkContext( VkCtx* ctx )
using TracyVkCtx = tracy::VkCtx*; using TracyVkCtx = tracy::VkCtx*;
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf ); #define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx ); #define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK #if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); # define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active ); # define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true ) # define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true ) # define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
#else #else
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active ); # define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active ); # define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true ) # define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true ) # define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
#endif #endif
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf ); #define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active ); # define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active ); # define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true ) # define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true ) # define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
#else #else

View File

@ -1,10 +1,8 @@
#ifdef _MSC_VER
# pragma warning(disable:4996)
#endif
namespace tracy namespace tracy
{ {
#if defined __linux__ && defined __ARM_ARCH
static const char* DecodeArmImplementer( uint32_t v ) static const char* DecodeArmImplementer( uint32_t v )
{ {
static char buf[16]; static char buf[16];
@ -16,6 +14,7 @@ static const char* DecodeArmImplementer( uint32_t v )
case 0x44: return "DEC"; case 0x44: return "DEC";
case 0x46: return "Fujitsu"; case 0x46: return "Fujitsu";
case 0x48: return "HiSilicon"; case 0x48: return "HiSilicon";
case 0x49: return "Infineon";
case 0x4d: return "Motorola"; case 0x4d: return "Motorola";
case 0x4e: return "Nvidia"; case 0x4e: return "Nvidia";
case 0x50: return "Applied Micro"; case 0x50: return "Applied Micro";
@ -27,6 +26,7 @@ static const char* DecodeArmImplementer( uint32_t v )
case 0x66: return "Faraday"; case 0x66: return "Faraday";
case 0x68: return "HXT"; case 0x68: return "HXT";
case 0x69: return "Intel"; case 0x69: return "Intel";
case 0xc0: return "Ampere Computing";
default: break; default: break;
} }
sprintf( buf, "0x%x", v ); sprintf( buf, "0x%x", v );
@ -75,6 +75,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xc60: return " Cortex-M0+"; case 0xc60: return " Cortex-M0+";
case 0xd00: return " AArch64 simulator"; case 0xd00: return " AArch64 simulator";
case 0xd01: return " Cortex-A32"; case 0xd01: return " Cortex-A32";
case 0xd02: return " Cortex-A34";
case 0xd03: return " Cortex-A53"; case 0xd03: return " Cortex-A53";
case 0xd04: return " Cortex-A35"; case 0xd04: return " Cortex-A35";
case 0xd05: return " Cortex-A55"; case 0xd05: return " Cortex-A55";
@ -91,6 +92,10 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xd13: return " Cortex-R52"; case 0xd13: return " Cortex-R52";
case 0xd20: return " Cortex-M23"; case 0xd20: return " Cortex-M23";
case 0xd21: return " Cortex-M33"; case 0xd21: return " Cortex-M33";
case 0xd40: return " Zeus";
case 0xd41: return " Cortex-A78";
case 0xd43: return " Cortex-A65AE";
case 0xd44: return " Cortex-X1";
case 0xd4a: return " Neoverse E1"; case 0xd4a: return " Neoverse E1";
default: break; default: break;
} }
@ -110,6 +115,13 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xa2: return " ThunderX 81XX"; case 0xa2: return " ThunderX 81XX";
case 0xa3: return " ThunderX 83XX"; case 0xa3: return " ThunderX 83XX";
case 0xaf: return " ThunderX2 99xx"; case 0xaf: return " ThunderX2 99xx";
case 0xb0: return " OcteonTX2";
case 0xb1: return " OcteonTX2 T98";
case 0xb2: return " OcteonTX2 T96";
case 0xb3: return " OcteonTX2 F95";
case 0xb4: return " OcteonTX2 F95N";
case 0xb5: return " OcteonTX2 F95MM";
case 0xb8: return " ThunderX3 T110";
default: break; default: break;
} }
case 0x44: case 0x44:
@ -212,6 +224,8 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
return buf; return buf;
} }
#elif defined __APPLE__ && TARGET_OS_IPHONE == 1
static const char* DecodeIosDevice( const char* id ) static const char* DecodeIosDevice( const char* id )
{ {
static const char* DeviceTable[] = { static const char* DeviceTable[] = {
@ -252,6 +266,7 @@ static const char* DecodeIosDevice( const char* id )
"iPhone12,1", "iPhone 11", "iPhone12,1", "iPhone 11",
"iPhone12,3", "iPhone 11 Pro", "iPhone12,3", "iPhone 11 Pro",
"iPhone12,5", "iPhone 11 Pro Max", "iPhone12,5", "iPhone 11 Pro Max",
"iPhone12,8", "iPhone SE 2nd Gen",
"iPad1,1", "iPad (A1219/A1337)", "iPad1,1", "iPad (A1219/A1337)",
"iPad2,1", "iPad 2 (A1395)", "iPad2,1", "iPad 2 (A1395)",
"iPad2,2", "iPad 2 (A1396)", "iPad2,2", "iPad 2 (A1396)",
@ -302,6 +317,10 @@ static const char* DecodeIosDevice( const char* id )
"iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)", "iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)",
"iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)", "iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
"iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)", "iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
"iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)",
"iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)",
"iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)",
"iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)",
"iPad11,1", "iPad Mini 5th gen (A2133)", "iPad11,1", "iPad Mini 5th gen (A2133)",
"iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)", "iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)",
"iPad11,3", "iPad Air 3rd gen (A2152)", "iPad11,3", "iPad Air 3rd gen (A2152)",
@ -325,4 +344,6 @@ static const char* DecodeIosDevice( const char* id )
return id; return id;
} }
#endif
} }

View File

@ -222,9 +222,9 @@ static const char* GetModuleName( uint64_t addr )
return "[unknown]"; return "[unknown]";
} }
SymbolData DecodeSymbolAddress( uint64_t ptr ) CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
{ {
SymbolData sym; CallstackSymbolData sym;
IMAGEHLP_LINE64 line; IMAGEHLP_LINE64 line;
DWORD displacement = 0; DWORD displacement = 0;
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64); line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
@ -242,9 +242,9 @@ SymbolData DecodeSymbolAddress( uint64_t ptr )
return sym; return sym;
} }
SymbolData DecodeCodeAddress( uint64_t ptr ) CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
{ {
SymbolData sym; CallstackSymbolData sym;
const auto proc = GetCurrentProcess(); const auto proc = GetCurrentProcess();
bool done = false; bool done = false;
@ -442,14 +442,10 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function ) static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
{ {
auto& sym = *(SymbolData*)data; auto& sym = *(CallstackSymbolData*)data;
if( !fn ) if( !fn )
{ {
const char* symloc = nullptr; sym.file = "[unknown]";
Dl_info dlinfo;
if( dladdr( (void*)pc, &dlinfo ) ) symloc = dlinfo.dli_fname;
if( !symloc ) symloc = "[unknown]";
sym.file = symloc;
sym.line = 0; sym.line = 0;
sym.needFree = false; sym.needFree = false;
} }
@ -465,20 +461,20 @@ static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, con
static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum*/ ) static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum*/ )
{ {
auto& sym = *(SymbolData*)data; auto& sym = *(CallstackSymbolData*)data;
sym.file = "[unknown]"; sym.file = "[unknown]";
sym.line = 0; sym.line = 0;
sym.needFree = false; sym.needFree = false;
} }
SymbolData DecodeSymbolAddress( uint64_t ptr ) CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
{ {
SymbolData sym; CallstackSymbolData sym;
backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym ); backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym );
return sym; return sym;
} }
SymbolData DecodeCodeAddress( uint64_t ptr ) CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
{ {
return DecodeSymbolAddress( ptr ); return DecodeSymbolAddress( ptr );
} }
@ -494,14 +490,12 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con
if( !fn && !function ) if( !fn && !function )
{ {
const char* symname = nullptr; const char* symname = nullptr;
const char* symloc = nullptr;
auto vptr = (void*)pc; auto vptr = (void*)pc;
ptrdiff_t symoff = 0; ptrdiff_t symoff = 0;
Dl_info dlinfo; Dl_info dlinfo;
if( dladdr( vptr, &dlinfo ) ) if( dladdr( vptr, &dlinfo ) )
{ {
symloc = dlinfo.dli_fname;
symname = dlinfo.dli_sname; symname = dlinfo.dli_sname;
symoff = (char*)pc - (char*)dlinfo.dli_saddr; symoff = (char*)pc - (char*)dlinfo.dli_saddr;
@ -518,7 +512,6 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con
} }
if( !symname ) symname = "[unknown]"; if( !symname ) symname = "[unknown]";
if( !symloc ) symloc = "[unknown]";
if( symoff == 0 ) if( symoff == 0 )
{ {
@ -536,15 +529,7 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con
cb_data[cb_num].name = name; cb_data[cb_num].name = name;
} }
char buf[32]; cb_data[cb_num].file = CopyString( "[unknown]" );
const auto addrlen = sprintf( buf, " [%p]", (void*)pc );
const auto loclen = strlen( symloc );
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
memcpy( loc, symloc, loclen );
memcpy( loc + loclen, buf, addrlen );
loc[loclen + addrlen] = '\0';
cb_data[cb_num].file = loc;
cb_data[cb_num].line = 0; cb_data[cb_num].line = 0;
} }
else else
@ -652,16 +637,16 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
return ret; return ret;
} }
SymbolData DecodeSymbolAddress( uint64_t ptr ) CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
{ {
const char* symloc = nullptr; const char* symloc = nullptr;
Dl_info dlinfo; Dl_info dlinfo;
if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname; if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname;
if( !symloc ) symloc = "[unknown]"; if( !symloc ) symloc = "[unknown]";
return SymbolData { symloc, 0, false }; return CallstackSymbolData { symloc, 0, false };
} }
SymbolData DecodeCodeAddress( uint64_t ptr ) CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
{ {
return DecodeSymbolAddress( ptr ); return DecodeSymbolAddress( ptr );
} }
@ -717,15 +702,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
cb.name = name; cb.name = name;
} }
char buf[32]; cb.file = CopyString( "[unknown]" );
const auto addrlen = sprintf( buf, " [%p]", (void*)ptr );
const auto loclen = strlen( symloc );
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
memcpy( loc, symloc, loclen );
memcpy( loc + loclen, buf, addrlen );
loc[loclen + addrlen] = '\0';
cb.file = loc;
cb.symLen = 0; cb.symLen = 0;
cb.symAddr = (uint64_t)symaddr; cb.symAddr = (uint64_t)symaddr;

View File

@ -22,7 +22,7 @@
namespace tracy namespace tracy
{ {
struct SymbolData struct CallstackSymbolData
{ {
const char* file; const char* file;
uint32_t line; uint32_t line;
@ -45,8 +45,8 @@ struct CallstackEntryData
const char* imageName; const char* imageName;
}; };
SymbolData DecodeSymbolAddress( uint64_t ptr ); CallstackSymbolData DecodeSymbolAddress( uint64_t ptr );
SymbolData DecodeCodeAddress( uint64_t ptr ); CallstackSymbolData DecodeCodeAddress( uint64_t ptr );
const char* DecodeCallstackPtrFast( uint64_t ptr ); const char* DecodeCallstackPtrFast( uint64_t ptr );
CallstackEntryData DecodeCallstackPtr( uint64_t ptr ); CallstackEntryData DecodeCallstackPtr( uint64_t ptr );
void InitCallstack(); void InitCallstack();

View File

@ -172,6 +172,12 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16; return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
} }
__m128i amask = _mm_set1_epi32( 0xFFFFFF );
px0 = _mm_and_si128( px0, amask );
px1 = _mm_and_si128( px1, amask );
px2 = _mm_and_si128( px2, amask );
px3 = _mm_and_si128( px3, amask );
__m128i min0 = _mm_min_epu8( px0, px1 ); __m128i min0 = _mm_min_epu8( px0, px1 );
__m128i min1 = _mm_min_epu8( px2, px3 ); __m128i min1 = _mm_min_epu8( px2, px3 );
__m128i min2 = _mm_min_epu8( min0, min1 ); __m128i min2 = _mm_min_epu8( min0, min1 );
@ -492,6 +498,12 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
return; return;
} }
__m256i amask = _mm256_set1_epi32( 0xFFFFFF );
px0 = _mm256_and_si256( px0, amask );
px1 = _mm256_and_si256( px1, amask );
px2 = _mm256_and_si256( px2, amask );
px3 = _mm256_and_si256( px3, amask );
__m256i min0 = _mm256_min_epu8( px0, px1 ); __m256i min0 = _mm256_min_epu8( px0, px1 );
__m256i min1 = _mm256_min_epu8( px2, px3 ); __m256i min1 = _mm256_min_epu8( px2, px3 );
__m256i min2 = _mm256_min_epu8( min0, min1 ); __m256i min2 = _mm256_min_epu8( min0, min1 );

View File

@ -23,7 +23,8 @@ public:
{ {
assert( m_id != std::numeric_limits<uint32_t>::max() ); assert( m_id != std::numeric_limits<uint32_t>::max() );
TracyLfqPrepare( QueueType::LockAnnounce ); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id ); MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() ); MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
@ -31,7 +32,7 @@ public:
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
#endif #endif
TracyLfqCommit; Profiler::QueueSerialFinish();
} }
LockableCtx( const LockableCtx& ) = delete; LockableCtx( const LockableCtx& ) = delete;
@ -39,14 +40,14 @@ public:
tracy_force_inline ~LockableCtx() tracy_force_inline ~LockableCtx()
{ {
TracyLfqPrepare( QueueType::LockTerminate ); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id ); MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() ); MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::Lockable );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
#endif #endif
TracyLfqCommit; Profiler::QueueSerialFinish();
} }
tracy_force_inline bool BeforeLock() tracy_force_inline bool BeforeLock()
@ -69,7 +70,6 @@ public:
MemWrite( &item->lockWait.thread, GetThreadHandle() ); MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id ); MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() ); MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::Lockable );
Profiler::QueueSerialFinish(); Profiler::QueueSerialFinish();
return true; return true;
} }
@ -155,16 +155,18 @@ public:
tracy_force_inline void CustomName( const char* name, size_t size ) tracy_force_inline void CustomName( const char* name, size_t size )
{ {
auto ptr = (char*)tracy_malloc( size+1 ); assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, name, size ); memcpy( ptr, name, size );
ptr[size] = '\0'; auto item = Profiler::QueueSerial();
TracyLfqPrepare( QueueType::LockName ); MemWrite( &item->hdr.type, QueueType::LockName );
MemWrite( &item->lockName.id, m_id ); MemWrite( &item->lockNameFat.id, m_id );
MemWrite( &item->lockName.name, (uint64_t)ptr ); MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
MemWrite( &item->lockNameFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
#endif #endif
TracyLfqCommit; Profiler::QueueSerialFinish();
} }
private: private:
@ -236,17 +238,16 @@ public:
{ {
assert( m_id != std::numeric_limits<uint32_t>::max() ); assert( m_id != std::numeric_limits<uint32_t>::max() );
TracyLfqPrepare( QueueType::LockAnnounce ); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id ); MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() ); MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc ); MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable ); MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
#endif #endif
Profiler::QueueSerialFinish();
TracyLfqCommit;
} }
SharedLockableCtx( const SharedLockableCtx& ) = delete; SharedLockableCtx( const SharedLockableCtx& ) = delete;
@ -254,16 +255,14 @@ public:
tracy_force_inline ~SharedLockableCtx() tracy_force_inline ~SharedLockableCtx()
{ {
TracyLfqPrepare( QueueType::LockTerminate ); auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id ); MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() ); MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::SharedLockable );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
#endif #endif
Profiler::QueueSerialFinish();
TracyLfqCommit;
} }
tracy_force_inline bool BeforeLock() tracy_force_inline bool BeforeLock()
@ -286,7 +285,6 @@ public:
MemWrite( &item->lockWait.thread, GetThreadHandle() ); MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id ); MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() ); MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
Profiler::QueueSerialFinish(); Profiler::QueueSerialFinish();
return true; return true;
} }
@ -369,7 +367,6 @@ public:
MemWrite( &item->lockWait.thread, GetThreadHandle() ); MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id ); MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() ); MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
Profiler::QueueSerialFinish(); Profiler::QueueSerialFinish();
return true; return true;
} }
@ -455,16 +452,18 @@ public:
tracy_force_inline void CustomName( const char* name, size_t size ) tracy_force_inline void CustomName( const char* name, size_t size )
{ {
auto ptr = (char*)tracy_malloc( size+1 ); assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, name, size ); memcpy( ptr, name, size );
ptr[size] = '\0'; auto item = Profiler::QueueSerial();
TracyLfqPrepare( QueueType::LockName ); MemWrite( &item->hdr.type, QueueType::LockName );
MemWrite( &item->lockName.id, m_id ); MemWrite( &item->lockNameFat.id, m_id );
MemWrite( &item->lockName.name, (uint64_t)ptr ); MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
MemWrite( &item->lockNameFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
#endif #endif
TracyLfqCommit; Profiler::QueueSerialFinish();
} }
private: private:

View File

@ -37,6 +37,10 @@
# include <sys/sysctl.h> # include <sys/sysctl.h>
#endif #endif
#if defined __APPLE__
# include "TargetConditionals.h"
#endif
#include <algorithm> #include <algorithm>
#include <assert.h> #include <assert.h>
#include <atomic> #include <atomic>
@ -80,11 +84,6 @@
# endif # endif
#endif #endif
#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6 && !defined TARGET_OS_IOS
# include <signal.h>
# include <setjmp.h>
#endif
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32 || defined __CYGWIN__
# include <lmcons.h> # include <lmcons.h>
extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW ); extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW );
@ -93,9 +92,6 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR
# include <unistd.h> # include <unistd.h>
# include <limits.h> # include <limits.h>
#endif #endif
#if defined __APPLE__
# include "TargetConditionals.h"
#endif
#if defined __linux__ #if defined __linux__
# include <sys/sysinfo.h> # include <sys/sysinfo.h>
# include <sys/utsname.h> # include <sys/utsname.h>
@ -231,7 +227,11 @@ static int64_t SetupHwTimer()
const char* noCheck = getenv( "TRACY_NO_INVARIANT_CHECK" ); const char* noCheck = getenv( "TRACY_NO_INVARIANT_CHECK" );
if( !noCheck || noCheck[0] != '1' ) if( !noCheck || noCheck[0] != '1' )
{ {
#if defined _WIN32 || defined __CYGWIN__
InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC define to use lower resolution timer." ); InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC define to use lower resolution timer." );
#else
InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*." );
#endif
} }
} }
#endif #endif
@ -270,6 +270,7 @@ static const char* GetProcessName()
return processName; return processName;
} }
#if defined __linux__ && defined __ARM_ARCH
static uint32_t GetHex( char*& ptr, int skip ) static uint32_t GetHex( char*& ptr, int skip )
{ {
uint32_t ret; uint32_t ret;
@ -287,6 +288,7 @@ static uint32_t GetHex( char*& ptr, int skip )
ptr = end; ptr = end;
return ret; return ret;
} }
#endif
static const char* GetHostInfo() static const char* GetHostInfo()
{ {
@ -540,6 +542,8 @@ static char s_crashText[1024];
LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp ) LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
{ {
if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH;
const unsigned ec = pExp->ExceptionRecord->ExceptionCode; const unsigned ec = pExp->ExceptionRecord->ExceptionCode;
auto msgPtr = s_crashText; auto msgPtr = s_crashText;
switch( ec ) switch( ec )
@ -590,12 +594,12 @@ LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
} }
{ {
GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
TracyLfqPrepare( QueueType::CrashReport ); TracyLfqPrepare( QueueType::CrashReport );
item->crashReport.time = Profiler::GetTime(); item->crashReport.time = Profiler::GetTime();
item->crashReport.text = (uint64_t)s_crashText; item->crashReport.text = (uint64_t)s_crashText;
TracyLfqCommit; TracyLfqCommit;
GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
} }
HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 ); HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 );
@ -822,12 +826,12 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
} }
{ {
GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
TracyLfqPrepare( QueueType::CrashReport ); TracyLfqPrepare( QueueType::CrashReport );
item->crashReport.time = Profiler::GetTime(); item->crashReport.time = Profiler::GetTime();
item->crashReport.text = (uint64_t)s_crashText; item->crashReport.text = (uint64_t)s_crashText;
TracyLfqCommit; TracyLfqCommit;
GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
} }
DIR* dp = opendir( "/proc/self/task" ); DIR* dp = opendir( "/proc/self/task" );
@ -863,7 +867,7 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
enum { QueuePrealloc = 256 * 1024 }; enum { QueuePrealloc = 256 * 1024 };
static Profiler* s_instance; static Profiler* s_instance = nullptr;
static Thread* s_thread; static Thread* s_thread;
static Thread* s_compressThread; static Thread* s_compressThread;
@ -871,6 +875,19 @@ static Thread* s_compressThread;
static Thread* s_sysTraceThread = nullptr; static Thread* s_sysTraceThread = nullptr;
#endif #endif
TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; }
TRACY_API int64_t GetFrequencyQpc()
{
#if defined _WIN32 || defined __CYGWIN__
LARGE_INTEGER t;
QueryPerformanceFrequency( &t );
return t.QuadPart;
#else
return 0;
#endif
}
#ifdef TRACY_DELAYED_INIT #ifdef TRACY_DELAYED_INIT
struct ThreadNameData; struct ThreadNameData;
TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue(); TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
@ -911,6 +928,25 @@ struct ProfilerThreadData
# endif # endif
}; };
# ifdef TRACY_MANUAL_LIFETIME
ProfilerData* s_profilerData = nullptr;
TRACY_API void StartupProfiler()
{
s_profilerData = new ProfilerData;
s_profilerData->profiler.SpawnWorkerThreads();
}
static ProfilerData& GetProfilerData()
{
assert(s_profilerData);
return *s_profilerData;
}
TRACY_API void ShutdownProfiler()
{
delete s_profilerData;
s_profilerData = nullptr;
rpmalloc_finalize();
}
# else
static std::atomic<int> profilerDataLock { 0 }; static std::atomic<int> profilerDataLock { 0 };
static std::atomic<ProfilerData*> profilerData { nullptr }; static std::atomic<ProfilerData*> profilerData { nullptr };
@ -932,6 +968,7 @@ static ProfilerData& GetProfilerData()
} }
return *ptr; return *ptr;
} }
# endif
static ProfilerThreadData& GetProfilerThreadData() static ProfilerThreadData& GetProfilerThreadData()
{ {
@ -953,10 +990,12 @@ std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().thr
TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; } TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; }
# endif # endif
# ifndef TRACY_MANUAL_LIFETIME
namespace namespace
{ {
const auto& __profiler_init = GetProfiler(); const auto& __profiler_init = GetProfiler();
} }
# endif
#else #else
TRACY_API void InitRPMallocThread() TRACY_API void InitRPMallocThread()
@ -1044,8 +1083,8 @@ Profiler::Profiler()
, m_fiQueue( 16 ) , m_fiQueue( 16 )
, m_fiDequeue( 16 ) , m_fiDequeue( 16 )
, m_frameCount( 0 ) , m_frameCount( 0 )
#ifdef TRACY_ON_DEMAND
, m_isConnected( false ) , m_isConnected( false )
#ifdef TRACY_ON_DEMAND
, m_connectionId( 0 ) , m_connectionId( 0 )
, m_deferredQueue( 64*1024 ) , m_deferredQueue( 64*1024 )
#endif #endif
@ -1081,6 +1120,13 @@ Profiler::Profiler()
m_userPort = atoi( userPort ); m_userPort = atoi( userPort );
} }
#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME)
SpawnWorkerThreads();
#endif
}
void Profiler::SpawnWorkerThreads()
{
s_thread = (Thread*)tracy_malloc( sizeof( Thread ) ); s_thread = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_thread) Thread( LaunchWorker, this ); new(s_thread) Thread( LaunchWorker, this );
@ -1172,6 +1218,8 @@ void Profiler::Worker()
s_profilerTid = syscall( SYS_gettid ); s_profilerTid = syscall( SYS_gettid );
#endif #endif
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy Profiler" ); SetThreadName( "Tracy Profiler" );
#ifdef TRACY_DATA_PORT #ifdef TRACY_DATA_PORT
@ -1223,6 +1271,12 @@ void Profiler::Worker()
uint8_t cpuArch = CpuArchUnknown; uint8_t cpuArch = CpuArchUnknown;
#endif #endif
#ifdef TRACY_NO_CODE_TRANSFER
uint8_t codeTransfer = 0;
#else
uint8_t codeTransfer = 1;
#endif
#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 #if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint32_t regs[4]; uint32_t regs[4];
char manufacturer[12]; char manufacturer[12];
@ -1250,6 +1304,7 @@ void Profiler::Worker()
MemWrite( &welcome.onDemand, onDemand ); MemWrite( &welcome.onDemand, onDemand );
MemWrite( &welcome.isApple, isApple ); MemWrite( &welcome.isApple, isApple );
MemWrite( &welcome.cpuArch, cpuArch ); MemWrite( &welcome.cpuArch, cpuArch );
MemWrite( &welcome.codeTransfer, codeTransfer );
memcpy( welcome.cpuManufacturer, manufacturer, 12 ); memcpy( welcome.cpuManufacturer, manufacturer, 12 );
MemWrite( &welcome.cpuId, cpuId ); MemWrite( &welcome.cpuId, cpuId );
memcpy( welcome.programName, procname, pnsz ); memcpy( welcome.programName, procname, pnsz );
@ -1318,6 +1373,11 @@ void Profiler::Worker()
#ifndef TRACY_NO_EXIT #ifndef TRACY_NO_EXIT
if( !m_noExit && ShouldExit() ) if( !m_noExit && ShouldExit() )
{ {
if( m_broadcast )
{
broadcastMsg.activeTime = -1;
m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
}
m_shutdownFinished.store( true, std::memory_order_relaxed ); m_shutdownFinished.store( true, std::memory_order_relaxed );
return; return;
} }
@ -1335,12 +1395,20 @@ void Profiler::Worker()
{ {
lastBroadcast = t; lastBroadcast = t;
const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count(); const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count();
broadcastMsg.activeTime = uint32_t( ts - m_epoch ); broadcastMsg.activeTime = int32_t( ts - m_epoch );
assert( broadcastMsg.activeTime >= 0 );
m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen ); m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
} }
} }
} }
if( m_broadcast )
{
lastBroadcast = 0;
broadcastMsg.activeTime = -1;
m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
}
// Handshake // Handshake
{ {
char shibboleth[HandshakeShibbolethSize]; char shibboleth[HandshakeShibbolethSize];
@ -1378,8 +1446,8 @@ void Profiler::Worker()
const auto currentTime = GetTime(); const auto currentTime = GetTime();
ClearQueues( token ); ClearQueues( token );
m_connectionId.fetch_add( 1, std::memory_order_release ); m_connectionId.fetch_add( 1, std::memory_order_release );
m_isConnected.store( true, std::memory_order_release );
#endif #endif
m_isConnected.store( true, std::memory_order_release );
HandshakeStatus handshake = HandshakeWelcome; HandshakeStatus handshake = HandshakeWelcome;
m_sock->Send( &handshake, sizeof( handshake ) ); m_sock->Send( &handshake, sizeof( handshake ) );
@ -1403,16 +1471,19 @@ void Profiler::Worker()
for( auto& item : m_deferredQueue ) for( auto& item : m_deferredQueue )
{ {
uint64_t ptr; uint64_t ptr;
uint16_t size;
const auto idx = MemRead<uint8_t>( &item.hdr.idx ); const auto idx = MemRead<uint8_t>( &item.hdr.idx );
switch( (QueueType)idx ) switch( (QueueType)idx )
{ {
case QueueType::MessageAppInfo: case QueueType::MessageAppInfo:
ptr = MemRead<uint64_t>( &item.message.text ); ptr = MemRead<uint64_t>( &item.messageFat.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); size = MemRead<uint16_t>( &item.messageFat.size );
SendSingleString( (const char*)ptr, size );
break; break;
case QueueType::LockName: case QueueType::LockName:
ptr = MemRead<uint64_t>( &item.lockName.name ); ptr = MemRead<uint64_t>( &item.lockNameFat.name );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); size = MemRead<uint16_t>( &item.lockNameFat.size );
SendSingleString( (const char*)ptr, size );
break; break;
default: default:
break; break;
@ -1469,8 +1540,8 @@ void Profiler::Worker()
} }
if( ShouldExit() ) break; if( ShouldExit() ) break;
#ifdef TRACY_ON_DEMAND
m_isConnected.store( false, std::memory_order_release ); m_isConnected.store( false, std::memory_order_release );
#ifdef TRACY_ON_DEMAND
m_bufferOffset = 0; m_bufferOffset = 0;
m_bufferStart = 0; m_bufferStart = 0;
#endif #endif
@ -1592,6 +1663,8 @@ void Profiler::Worker()
void Profiler::CompressWorker() void Profiler::CompressWorker()
{ {
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy DXT1" ); SetThreadName( "Tracy DXT1" );
while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) ); while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
rpmalloc_thread_initialize(); rpmalloc_thread_initialize();
@ -1631,12 +1704,12 @@ void Profiler::CompressWorker()
tracy_free( fi->image ); tracy_free( fi->image );
TracyLfqPrepare( QueueType::FrameImage ); TracyLfqPrepare( QueueType::FrameImage );
MemWrite( &item->frameImage.image, (uint64_t)etc1buf ); MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf );
MemWrite( &item->frameImage.frame, fi->frame ); MemWrite( &item->frameImageFat.frame, fi->frame );
MemWrite( &item->frameImage.w, w ); MemWrite( &item->frameImageFat.w, w );
MemWrite( &item->frameImage.h, h ); MemWrite( &item->frameImageFat.h, h );
uint8_t flip = fi->flip; uint8_t flip = fi->flip;
MemWrite( &item->frameImage.flip, flip ); MemWrite( &item->frameImageFat.flip, flip );
TracyLfqCommit; TracyLfqCommit;
fi++; fi++;
@ -1664,7 +1737,7 @@ static void FreeAssociatedMemory( const QueueItem& item )
{ {
case QueueType::ZoneText: case QueueType::ZoneText:
case QueueType::ZoneName: case QueueType::ZoneName:
ptr = MemRead<uint64_t>( &item.zoneText.text ); ptr = MemRead<uint64_t>( &item.zoneTextFat.text );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::Message: case QueueType::Message:
@ -1674,7 +1747,7 @@ static void FreeAssociatedMemory( const QueueItem& item )
#ifndef TRACY_ON_DEMAND #ifndef TRACY_ON_DEMAND
case QueueType::MessageAppInfo: case QueueType::MessageAppInfo:
#endif #endif
ptr = MemRead<uint64_t>( &item.message.text ); ptr = MemRead<uint64_t>( &item.messageFat.text );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::ZoneBeginAllocSrcLoc: case QueueType::ZoneBeginAllocSrcLoc:
@ -1683,30 +1756,27 @@ static void FreeAssociatedMemory( const QueueItem& item )
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::CallstackMemory: case QueueType::CallstackMemory:
ptr = MemRead<uint64_t>( &item.callstackMemory.ptr );
tracy_free( (void*)ptr );
break;
case QueueType::Callstack: case QueueType::Callstack:
ptr = MemRead<uint64_t>( &item.callstack.ptr ); ptr = MemRead<uint64_t>( &item.callstackFat.ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::CallstackAlloc: case QueueType::CallstackAlloc:
ptr = MemRead<uint64_t>( &item.callstackAlloc.nativePtr ); ptr = MemRead<uint64_t>( &item.callstackAllocFat.nativePtr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
ptr = MemRead<uint64_t>( &item.callstackAlloc.ptr ); ptr = MemRead<uint64_t>( &item.callstackAllocFat.ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::CallstackSample: case QueueType::CallstackSample:
ptr = MemRead<uint64_t>( &item.callstackSample.ptr ); ptr = MemRead<uint64_t>( &item.callstackSampleFat.ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::FrameImage: case QueueType::FrameImage:
ptr = MemRead<uint64_t>( &item.frameImage.image ); ptr = MemRead<uint64_t>( &item.frameImageFat.image );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
#ifndef TRACY_ON_DEMAND #ifndef TRACY_ON_DEMAND
case QueueType::LockName: case QueueType::LockName:
ptr = MemRead<uint64_t>( &item.lockName.name ); ptr = MemRead<uint64_t>( &item.lockNameFat.name );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
#endif #endif
@ -1779,6 +1849,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
while( sz-- > 0 ) while( sz-- > 0 )
{ {
uint64_t ptr; uint64_t ptr;
uint16_t size;
auto idx = MemRead<uint8_t>( &item->hdr.idx ); auto idx = MemRead<uint8_t>( &item->hdr.idx );
if( idx < (int)QueueType::Terminate ) if( idx < (int)QueueType::Terminate )
{ {
@ -1786,21 +1857,29 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
{ {
case QueueType::ZoneText: case QueueType::ZoneText:
case QueueType::ZoneName: case QueueType::ZoneName:
ptr = MemRead<uint64_t>( &item->zoneText.text ); ptr = MemRead<uint64_t>( &item->zoneTextFat.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); size = MemRead<uint16_t>( &item->zoneTextFat.size );
SendSingleString( (const char*)ptr, size );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::Message: case QueueType::Message:
case QueueType::MessageColor:
case QueueType::MessageCallstack: case QueueType::MessageCallstack:
ptr = MemRead<uint64_t>( &item->messageFat.text );
size = MemRead<uint16_t>( &item->messageFat.size );
SendSingleString( (const char*)ptr, size );
tracy_free( (void*)ptr );
break;
case QueueType::MessageColor:
case QueueType::MessageColorCallstack: case QueueType::MessageColorCallstack:
ptr = MemRead<uint64_t>( &item->message.text ); ptr = MemRead<uint64_t>( &item->messageColorFat.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); size = MemRead<uint16_t>( &item->messageColorFat.size );
SendSingleString( (const char*)ptr, size );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
break; break;
case QueueType::MessageAppInfo: case QueueType::MessageAppInfo:
ptr = MemRead<uint64_t>( &item->message.text ); ptr = MemRead<uint64_t>( &item->messageFat.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData ); size = MemRead<uint16_t>( &item->messageFat.size );
SendSingleString( (const char*)ptr, size );
#ifndef TRACY_ON_DEMAND #ifndef TRACY_ON_DEMAND
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
#endif #endif
@ -1815,54 +1894,44 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc ); ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
SendSourceLocationPayload( ptr ); SendSourceLocationPayload( ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break; break;
} }
case QueueType::Callstack: case QueueType::Callstack:
ptr = MemRead<uint64_t>( &item->callstack.ptr ); ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
SendCallstackPayload( ptr ); SendCallstackPayload( ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break; break;
case QueueType::CallstackAlloc: case QueueType::CallstackAlloc:
ptr = MemRead<uint64_t>( &item->callstackAlloc.nativePtr ); ptr = MemRead<uint64_t>( &item->callstackAllocFat.nativePtr );
if( ptr != 0 ) if( ptr != 0 )
{ {
CutCallstack( (void*)ptr, "lua_pcall" ); CutCallstack( (void*)ptr, "lua_pcall" );
SendCallstackPayload( ptr ); SendCallstackPayload( ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
} }
ptr = MemRead<uint64_t>( &item->callstackAlloc.ptr ); ptr = MemRead<uint64_t>( &item->callstackAllocFat.ptr );
SendCallstackAlloc( ptr ); SendCallstackAlloc( ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break; break;
case QueueType::CallstackSample: case QueueType::CallstackSample:
{ {
ptr = MemRead<uint64_t>( &item->callstackSample.ptr ); ptr = MemRead<uint64_t>( &item->callstackSampleFat.ptr );
SendCallstackPayload64( ptr ); SendCallstackPayload64( ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
int64_t t = MemRead<int64_t>( &item->callstackSample.time ); int64_t t = MemRead<int64_t>( &item->callstackSampleFat.time );
int64_t dt = t - refCtx; int64_t dt = t - refCtx;
refCtx = t; refCtx = t;
MemWrite( &item->callstackSample.time, dt ); MemWrite( &item->callstackSampleFat.time, dt );
idx++;
MemWrite( &item->hdr.idx, idx );
break; break;
} }
case QueueType::FrameImage: case QueueType::FrameImage:
{ {
ptr = MemRead<uint64_t>( &item->frameImage.image ); ptr = MemRead<uint64_t>( &item->frameImageFat.image );
const auto w = MemRead<uint16_t>( &item->frameImage.w ); const auto w = MemRead<uint16_t>( &item->frameImageFat.w );
const auto h = MemRead<uint16_t>( &item->frameImage.h ); const auto h = MemRead<uint16_t>( &item->frameImageFat.h );
const auto csz = size_t( w * h / 2 ); const auto csz = size_t( w * h / 2 );
SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData ); SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break; break;
} }
case QueueType::ZoneBegin: case QueueType::ZoneBegin:
@ -1882,13 +1951,6 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
MemWrite( &item->zoneEnd.time, dt ); MemWrite( &item->zoneEnd.time, dt );
break; break;
} }
case QueueType::LockName:
ptr = MemRead<uint64_t>( &item->lockName.name );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
#ifndef TRACY_ON_DEMAND
tracy_free( (void*)ptr );
#endif
break;
case QueueType::GpuZoneBegin: case QueueType::GpuZoneBegin:
case QueueType::GpuZoneBeginCallstack: case QueueType::GpuZoneBeginCallstack:
{ {
@ -2056,11 +2118,9 @@ Profiler::DequeueStatus Profiler::DequeueSerial()
switch( (QueueType)idx ) switch( (QueueType)idx )
{ {
case QueueType::CallstackMemory: case QueueType::CallstackMemory:
ptr = MemRead<uint64_t>( &item->callstackMemory.ptr ); ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
SendCallstackPayload( ptr ); SendCallstackPayload( ptr );
tracy_free( (void*)ptr ); tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break; break;
case QueueType::LockWait: case QueueType::LockWait:
case QueueType::LockSharedWait: case QueueType::LockSharedWait:
@ -2089,8 +2149,20 @@ Profiler::DequeueStatus Profiler::DequeueSerial()
MemWrite( &item->lockRelease.time, dt ); MemWrite( &item->lockRelease.time, dt );
break; break;
} }
case QueueType::LockName:
{
ptr = MemRead<uint64_t>( &item->lockNameFat.name );
uint16_t size = MemRead<uint16_t>( &item->lockNameFat.size );
SendSingleString( (const char*)ptr, size );
#ifndef TRACY_ON_DEMAND
tracy_free( (void*)ptr );
#endif
break;
}
case QueueType::MemAlloc: case QueueType::MemAlloc:
case QueueType::MemAllocNamed:
case QueueType::MemAllocCallstack: case QueueType::MemAllocCallstack:
case QueueType::MemAllocCallstackNamed:
{ {
int64_t t = MemRead<int64_t>( &item->memAlloc.time ); int64_t t = MemRead<int64_t>( &item->memAlloc.time );
int64_t dt = t - refSerial; int64_t dt = t - refSerial;
@ -2099,7 +2171,9 @@ Profiler::DequeueStatus Profiler::DequeueSerial()
break; break;
} }
case QueueType::MemFree: case QueueType::MemFree:
case QueueType::MemFreeNamed:
case QueueType::MemFreeCallstack: case QueueType::MemFreeCallstack:
case QueueType::MemFreeCallstackNamed:
{ {
int64_t t = MemRead<int64_t>( &item->memFree.time ); int64_t t = MemRead<int64_t>( &item->memFree.time );
int64_t dt = t - refSerial; int64_t dt = t - refSerial;
@ -2166,11 +2240,10 @@ bool Profiler::SendData( const char* data, size_t len )
return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1; return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1;
} }
void Profiler::SendString( uint64_t str, const char* ptr, QueueType type ) void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type )
{ {
assert( type == QueueType::StringData || assert( type == QueueType::StringData ||
type == QueueType::ThreadName || type == QueueType::ThreadName ||
type == QueueType::CustomStringData ||
type == QueueType::PlotName || type == QueueType::PlotName ||
type == QueueType::FrameName || type == QueueType::FrameName ||
type == QueueType::ExternalName || type == QueueType::ExternalName ||
@ -2180,7 +2253,6 @@ void Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
MemWrite( &item.hdr.type, type ); MemWrite( &item.hdr.type, type );
MemWrite( &item.stringTransfer.ptr, str ); MemWrite( &item.stringTransfer.ptr, str );
auto len = strlen( ptr );
assert( len <= std::numeric_limits<uint16_t>::max() ); assert( len <= std::numeric_limits<uint16_t>::max() );
auto l16 = uint16_t( len ); auto l16 = uint16_t( len );
@ -2191,6 +2263,36 @@ void Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
AppendDataUnsafe( ptr, l16 ); AppendDataUnsafe( ptr, l16 );
} }
void Profiler::SendSingleString( const char* ptr, size_t len )
{
QueueItem item;
MemWrite( &item.hdr.type, QueueType::SingleStringData );
assert( len <= std::numeric_limits<uint16_t>::max() );
auto l16 = uint16_t( len );
NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] );
AppendDataUnsafe( &l16, sizeof( l16 ) );
AppendDataUnsafe( ptr, l16 );
}
void Profiler::SendSecondString( const char* ptr, size_t len )
{
QueueItem item;
MemWrite( &item.hdr.type, QueueType::SecondStringData );
assert( len <= std::numeric_limits<uint16_t>::max() );
auto l16 = uint16_t( len );
NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] );
AppendDataUnsafe( &l16, sizeof( l16 ) );
AppendDataUnsafe( ptr, l16 );
}
void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type ) void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type )
{ {
assert( type == QueueType::FrameImageData || assert( type == QueueType::FrameImageData ||
@ -2234,16 +2336,17 @@ void Profiler::SendSourceLocationPayload( uint64_t _ptr )
MemWrite( &item.hdr.type, QueueType::SourceLocationPayload ); MemWrite( &item.hdr.type, QueueType::SourceLocationPayload );
MemWrite( &item.stringTransfer.ptr, _ptr ); MemWrite( &item.stringTransfer.ptr, _ptr );
const auto len = *((uint32_t*)ptr); uint16_t len;
assert( len <= std::numeric_limits<uint16_t>::max() ); memcpy( &len, ptr, sizeof( len ) );
assert( len > 4 ); assert( len > 2 );
const auto l16 = uint16_t( len - 4 ); len -= 2;
ptr += 2;
NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( l16 ) + l16 ); NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] );
AppendDataUnsafe( &l16, sizeof( l16 ) ); AppendDataUnsafe( &len, sizeof( len ) );
AppendDataUnsafe( ptr + 4, l16 ); AppendDataUnsafe( ptr, len );
} }
void Profiler::SendCallstackPayload( uint64_t _ptr ) void Profiler::SendCallstackPayload( uint64_t _ptr )
@ -2304,15 +2407,15 @@ void Profiler::SendCallstackAlloc( uint64_t _ptr )
MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload ); MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload );
MemWrite( &item.stringTransfer.ptr, _ptr ); MemWrite( &item.stringTransfer.ptr, _ptr );
const auto len = *((uint32_t*)ptr); uint16_t len;
assert( len <= std::numeric_limits<uint16_t>::max() ); memcpy( &len, ptr, 2 );
const auto l16 = uint16_t( len ); ptr += 2;
NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( l16 ) + l16 ); NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( len ) + len );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] ); AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] );
AppendDataUnsafe( &l16, sizeof( l16 ) ); AppendDataUnsafe( &len, sizeof( len ) );
AppendDataUnsafe( ptr + 4, l16 ); AppendDataUnsafe( ptr, len );
} }
void Profiler::SendCallstackFrame( uint64_t ptr ) void Profiler::SendCallstackFrame( uint64_t ptr )
@ -2321,13 +2424,12 @@ void Profiler::SendCallstackFrame( uint64_t ptr )
const auto frameData = DecodeCallstackPtr( ptr ); const auto frameData = DecodeCallstackPtr( ptr );
{ {
SendString( uint64_t( frameData.imageName ), frameData.imageName, QueueType::CustomStringData ); SendSingleString( frameData.imageName );
QueueItem item; QueueItem item;
MemWrite( &item.hdr.type, QueueType::CallstackFrameSize ); MemWrite( &item.hdr.type, QueueType::CallstackFrameSize );
MemWrite( &item.callstackFrameSize.ptr, ptr ); MemWrite( &item.callstackFrameSize.ptr, ptr );
MemWrite( &item.callstackFrameSize.size, frameData.size ); MemWrite( &item.callstackFrameSize.size, frameData.size );
MemWrite( &item.callstackFrameSize.imageName, (uint64_t)frameData.imageName );
AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrameSize] ); AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrameSize] );
} }
@ -2336,23 +2438,14 @@ void Profiler::SendCallstackFrame( uint64_t ptr )
{ {
const auto& frame = frameData.data[i]; const auto& frame = frameData.data[i];
SendString( uint64_t( frame.name ), frame.name, QueueType::CustomStringData ); SendSingleString( frame.name );
SendString( uint64_t( frame.file ), frame.file, QueueType::CustomStringData ); SendSecondString( frame.file );
QueueItem item; QueueItem item;
MemWrite( &item.hdr.type, QueueType::CallstackFrame ); MemWrite( &item.hdr.type, QueueType::CallstackFrame );
MemWrite( &item.callstackFrame.name, (uint64_t)frame.name );
MemWrite( &item.callstackFrame.file, (uint64_t)frame.file );
MemWrite( &item.callstackFrame.line, frame.line ); MemWrite( &item.callstackFrame.line, frame.line );
MemWrite( &item.callstackFrame.symAddr, frame.symAddr ); MemWrite( &item.callstackFrame.symAddr, frame.symAddr );
if( frame.symLen > ( 1 << 24 ) ) MemWrite( &item.callstackFrame.symLen, frame.symLen );
{
memset( item.callstackFrame.symLen, 0, 3 );
}
else
{
memcpy( item.callstackFrame.symLen, &frame.symLen, 3 );
}
AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] ); AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] );
@ -2383,7 +2476,7 @@ bool Profiler::HandleServerQuery()
case ServerQueryThreadString: case ServerQueryThreadString:
if( ptr == m_mainThread ) if( ptr == m_mainThread )
{ {
SendString( ptr, "Main thread", QueueType::ThreadName ); SendString( ptr, "Main thread", 11, QueueType::ThreadName );
} }
else else
{ {
@ -2418,9 +2511,11 @@ bool Profiler::HandleServerQuery()
case ServerQuerySymbol: case ServerQuerySymbol:
HandleSymbolQuery( ptr ); HandleSymbolQuery( ptr );
break; break;
#ifndef TRACY_NO_CODE_TRANSFER
case ServerQuerySymbolCode: case ServerQuerySymbolCode:
HandleSymbolCodeQuery( ptr, extra ); HandleSymbolCodeQuery( ptr, extra );
break; break;
#endif
case ServerQueryCodeLocation: case ServerQueryCodeLocation:
SendCodeLocation( ptr ); SendCodeLocation( ptr );
break; break;
@ -2517,9 +2612,6 @@ void Profiler::HandleDisconnect()
void Profiler::CalibrateTimer() void Profiler::CalibrateTimer()
{ {
#ifdef TRACY_HW_TIMER #ifdef TRACY_HW_TIMER
# if !defined TARGET_OS_IOS && __ARM_ARCH >= 6
m_timerMul = 1.;
# else
std::atomic_signal_fence( std::memory_order_acq_rel ); std::atomic_signal_fence( std::memory_order_acq_rel );
const auto t0 = std::chrono::high_resolution_clock::now(); const auto t0 = std::chrono::high_resolution_clock::now();
const auto r0 = GetTime(); const auto r0 = GetTime();
@ -2534,7 +2626,6 @@ void Profiler::CalibrateTimer()
const auto dr = r1 - r0; const auto dr = r1 - r0;
m_timerMul = double( dt ) / double( dr ); m_timerMul = double( dt ) / double( dr );
# endif
#else #else
m_timerMul = 1.; m_timerMul = 1.;
#endif #endif
@ -2542,7 +2633,7 @@ void Profiler::CalibrateTimer()
void Profiler::CalibrateDelay() void Profiler::CalibrateDelay()
{ {
enum { Iterations = 50000 }; constexpr int Iterations = 50000;
auto mindiff = std::numeric_limits<int64_t>::max(); auto mindiff = std::numeric_limits<int64_t>::max();
for( int i=0; i<Iterations * 10; i++ ) for( int i=0; i<Iterations * 10; i++ )
@ -2557,7 +2648,7 @@ void Profiler::CalibrateDelay()
#ifdef TRACY_DELAYED_INIT #ifdef TRACY_DELAYED_INIT
m_delay = m_resolution; m_delay = m_resolution;
#else #else
enum { Events = Iterations * 2 }; // start + end constexpr int Events = Iterations * 2; // start + end
static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" ); static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
static const tracy::SourceLocationData __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; static const tracy::SourceLocationData __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
@ -2692,12 +2783,16 @@ void Profiler::ReportTopology()
sprintf( path, "%s%i/topology/physical_package_id", basePath, i ); sprintf( path, "%s%i/topology/physical_package_id", basePath, i );
char buf[1024]; char buf[1024];
FILE* f = fopen( path, "rb" ); FILE* f = fopen( path, "rb" );
if( !f )
{
tracy_free( cpuData );
return;
}
auto read = fread( buf, 1, 1024, f ); auto read = fread( buf, 1, 1024, f );
buf[read] = '\0'; buf[read] = '\0';
fclose( f ); fclose( f );
cpuData[i].package = uint32_t( atoi( buf ) ); cpuData[i].package = uint32_t( atoi( buf ) );
cpuData[i].thread = i; cpuData[i].thread = i;
sprintf( path, "%s%i/topology/core_id", basePath, i ); sprintf( path, "%s%i/topology/core_id", basePath, i );
f = fopen( path, "rb" ); f = fopen( path, "rb" );
read = fread( buf, 1, 1024, f ); read = fread( buf, 1, 1024, f );
@ -2730,11 +2825,10 @@ void Profiler::ReportTopology()
void Profiler::SendCallstack( int depth, const char* skipBefore ) void Profiler::SendCallstack( int depth, const char* skipBefore )
{ {
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
TracyLfqPrepare( QueueType::Callstack );
auto ptr = Callstack( depth ); auto ptr = Callstack( depth );
CutCallstack( ptr, skipBefore ); CutCallstack( ptr, skipBefore );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyLfqPrepare( QueueType::Callstack );
MemWrite( &item->callstack.ptr, (uint64_t)ptr );
TracyLfqCommit; TracyLfqCommit;
#endif #endif
} }
@ -2800,11 +2894,10 @@ void Profiler::HandleSymbolQuery( uint64_t symbol )
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
const auto sym = DecodeSymbolAddress( symbol ); const auto sym = DecodeSymbolAddress( symbol );
SendString( uint64_t( sym.file ), sym.file, QueueType::CustomStringData ); SendSingleString( sym.file );
QueueItem item; QueueItem item;
MemWrite( &item.hdr.type, QueueType::SymbolInformation ); MemWrite( &item.hdr.type, QueueType::SymbolInformation );
MemWrite( &item.symbolInformation.file, uint64_t( sym.file ) );
MemWrite( &item.symbolInformation.line, sym.line ); MemWrite( &item.symbolInformation.line, sym.line );
MemWrite( &item.symbolInformation.symAddr, symbol ); MemWrite( &item.symbolInformation.symAddr, symbol );
@ -2824,12 +2917,11 @@ void Profiler::SendCodeLocation( uint64_t ptr )
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
const auto sym = DecodeCodeAddress( ptr ); const auto sym = DecodeCodeAddress( ptr );
SendString( uint64_t( sym.file ), sym.file, QueueType::CustomStringData ); SendSingleString( sym.file );
QueueItem item; QueueItem item;
MemWrite( &item.hdr.type, QueueType::CodeInformation ); MemWrite( &item.hdr.type, QueueType::CodeInformation );
MemWrite( &item.codeInformation.ptr, ptr ); MemWrite( &item.codeInformation.ptr, ptr );
MemWrite( &item.codeInformation.file, uint64_t( sym.file ) );
MemWrite( &item.codeInformation.line, sym.line ); MemWrite( &item.codeInformation.line, sym.line );
AppendData( &item, QueueDataSize[(int)QueueType::CodeInformation] ); AppendData( &item, QueueDataSize[(int)QueueType::CodeInformation] );
@ -2900,14 +2992,13 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___trac
TracyLfqCommitC; TracyLfqCommitC;
} }
#endif #endif
tracy::GetProfiler().SendCallstack( depth );
{ {
TracyLfqPrepareC( tracy::QueueType::ZoneBeginCallstack ); TracyLfqPrepareC( tracy::QueueType::ZoneBeginCallstack );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommitC; TracyLfqCommitC;
} }
tracy::GetProfiler().SendCallstack( depth );
return ctx; return ctx;
} }
@ -2966,14 +3057,13 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo
TracyLfqCommitC; TracyLfqCommitC;
} }
#endif #endif
tracy::GetProfiler().SendCallstack( depth );
{ {
TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack ); TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() ); tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, srcloc ); tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyLfqCommitC; TracyLfqCommitC;
} }
tracy::GetProfiler().SendCallstack( depth );
return ctx; return ctx;
} }
@ -2996,10 +3086,10 @@ TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx )
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size ) TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size )
{ {
assert( size < std::numeric_limits<uint16_t>::max() );
if( !ctx.active ) return; if( !ctx.active ) return;
auto ptr = (char*)tracy::tracy_malloc( size+1 ); auto ptr = (char*)tracy::tracy_malloc( size );
memcpy( ptr, txt, size ); memcpy( ptr, txt, size );
ptr[size] = '\0';
#ifndef TRACY_NO_VERIFY #ifndef TRACY_NO_VERIFY
{ {
TracyLfqPrepareC( tracy::QueueType::ZoneValidation ); TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
@ -3009,17 +3099,18 @@ TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size
#endif #endif
{ {
TracyLfqPrepareC( tracy::QueueType::ZoneText ); TracyLfqPrepareC( tracy::QueueType::ZoneText );
tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr ); tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommitC; TracyLfqCommitC;
} }
} }
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size ) TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size )
{ {
assert( size < std::numeric_limits<uint16_t>::max() );
if( !ctx.active ) return; if( !ctx.active ) return;
auto ptr = (char*)tracy::tracy_malloc( size+1 ); auto ptr = (char*)tracy::tracy_malloc( size );
memcpy( ptr, txt, size ); memcpy( ptr, txt, size );
ptr[size] = '\0';
#ifndef TRACY_NO_VERIFY #ifndef TRACY_NO_VERIFY
{ {
TracyLfqPrepareC( tracy::QueueType::ZoneValidation ); TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
@ -3029,7 +3120,8 @@ TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size
#endif #endif
{ {
TracyLfqPrepareC( tracy::QueueType::ZoneName ); TracyLfqPrepareC( tracy::QueueType::ZoneName );
tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr ); tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommitC; TracyLfqCommitC;
} }
} }
@ -3051,10 +3143,10 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value )
} }
} }
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size ) { tracy::Profiler::MemAlloc( ptr, size ); } TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); }
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth ); } TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); }
TRACY_API void ___tracy_emit_memory_free( const void* ptr ) { tracy::Profiler::MemFree( ptr ); } TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); }
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth ) { tracy::Profiler::MemFreeCallstack( ptr, depth ); } TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); }
TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); } TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); }
TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); } TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); }
TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); } TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); }
@ -3065,8 +3157,27 @@ TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy:
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); } TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); } TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); }
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); } TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); }
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, const char* function ) { return tracy::Profiler::AllocSourceLocation( line, source, function ); }
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) { return tracy::Profiler::AllocSourceLocation( line, source, function, name, nameSz ); } TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) {
return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz );
}
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) {
return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
}
// thread_locals are not initialized on thread creation. At least on GNU/Linux. Instead they are
// initialized on their first ODR-use. This means that the allocator is not automagically
// initialized every time a thread is created. As thus, expose to the C API users a simple API to
// call every time they create a thread. Here we can then put all sorts of per-thread
// initialization.
TRACY_API void ___tracy_init_thread(void) {
#ifdef TRACY_DELAYED_INIT
(void)tracy::GetProfilerThreadData();
#else
(void)tracy::s_rpmalloc_thread_init;
#endif
}
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -5,6 +5,7 @@
#include <atomic> #include <atomic>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
#include <time.h>
#include "tracy_concurrentqueue.h" #include "tracy_concurrentqueue.h"
#include "TracyCallstack.hpp" #include "TracyCallstack.hpp"
@ -24,11 +25,11 @@
# include <mach/mach_time.h> # include <mach/mach_time.h>
#endif #endif
#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6 #if defined _WIN32 || defined __CYGWIN__ || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 )
# define TRACY_HW_TIMER # define TRACY_HW_TIMER
#endif #endif
#if !defined TRACY_HW_TIMER || ( defined __ARM_ARCH && __ARM_ARCH >= 6 && !defined CLOCK_MONOTONIC_RAW ) #if !defined TRACY_HW_TIMER
#include <chrono> #include <chrono>
#endif #endif
@ -41,6 +42,10 @@
namespace tracy namespace tracy
{ {
#if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME)
void StartupProfiler();
void ShutdownProfiler();
#endif
class GpuCtx; class GpuCtx;
class Profiler; class Profiler;
@ -59,6 +64,8 @@ TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
TRACY_API GpuCtxWrapper& GetGpuCtx(); TRACY_API GpuCtxWrapper& GetGpuCtx();
TRACY_API uint64_t GetThreadHandle(); TRACY_API uint64_t GetThreadHandle();
TRACY_API void InitRPMallocThread(); TRACY_API void InitRPMallocThread();
TRACY_API bool ProfilerAvailable();
TRACY_API int64_t GetFrequencyQpc();
struct SourceLocationData struct SourceLocationData
{ {
@ -106,7 +113,7 @@ class Profiler
struct FrameImageQueueItem struct FrameImageQueueItem
{ {
void* image; void* image;
uint64_t frame; uint32_t frame;
uint16_t w; uint16_t w;
uint16_t h; uint16_t h;
uint8_t offset; uint8_t offset;
@ -117,19 +124,13 @@ public:
Profiler(); Profiler();
~Profiler(); ~Profiler();
void SpawnWorkerThreads();
static tracy_force_inline int64_t GetTime() static tracy_force_inline int64_t GetTime()
{ {
#ifdef TRACY_HW_TIMER #ifdef TRACY_HW_TIMER
# if defined TARGET_OS_IOS && TARGET_OS_IOS == 1 # if defined TARGET_OS_IOS && TARGET_OS_IOS == 1
return mach_absolute_time(); return mach_absolute_time();
# elif defined __ARM_ARCH && __ARM_ARCH >= 6
# ifdef CLOCK_MONOTONIC_RAW
struct timespec ts;
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
# else
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
# endif
# elif defined _WIN32 || defined __CYGWIN__ # elif defined _WIN32 || defined __CYGWIN__
# ifdef TRACY_TIMER_QPC # ifdef TRACY_TIMER_QPC
return GetTimeQpc(); return GetTimeQpc();
@ -144,9 +145,17 @@ public:
uint64_t rax, rdx; uint64_t rax, rdx;
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) ); asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
return ( rdx << 32 ) + rax; return ( rdx << 32 ) + rax;
# else
# error "TRACY_HW_TIMER detection logic needs fixing"
# endif # endif
#else #else
# if defined __linux__ && defined CLOCK_MONOTONIC_RAW
struct timespec ts;
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
# else
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count(); return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
# endif
#endif #endif
} }
@ -197,6 +206,7 @@ public:
static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip ) static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip )
{ {
auto& profiler = GetProfiler(); auto& profiler = GetProfiler();
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits<uint32_t>::max() );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return; if( !profiler.IsConnected() ) return;
#endif #endif
@ -207,7 +217,7 @@ public:
profiler.m_fiLock.lock(); profiler.m_fiLock.lock();
auto fi = profiler.m_fiQueue.prepare_next(); auto fi = profiler.m_fiQueue.prepare_next();
fi->image = ptr; fi->image = ptr;
fi->frame = profiler.m_frameCount.load( std::memory_order_relaxed ) - offset; fi->frame = uint32_t( profiler.m_frameCount.load( std::memory_order_relaxed ) - offset );
fi->w = w; fi->w = w;
fi->h = h; fi->h = h;
fi->flip = flip; fi->flip = flip;
@ -269,18 +279,19 @@ public:
static tracy_force_inline void Message( const char* txt, size_t size, int callstack ) static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
{ {
assert( size < std::numeric_limits<uint16_t>::max() );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return; if( !GetProfiler().IsConnected() ) return;
#endif #endif
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
MemWrite( &item->messageFat.time, GetTime() );
MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
TracyLfqCommit;
} }
static tracy_force_inline void Message( const char* txt, int callstack ) static tracy_force_inline void Message( const char* txt, int callstack )
@ -288,31 +299,32 @@ public:
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return; if( !GetProfiler().IsConnected() ) return;
#endif #endif
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)txt );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
MemWrite( &item->messageLiteral.time, GetTime() );
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
TracyLfqCommit;
} }
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack ) static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
{ {
assert( size < std::numeric_limits<uint16_t>::max() );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return; if( !GetProfiler().IsConnected() ) return;
#endif #endif
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
MemWrite( &item->messageColor.time, GetTime() );
MemWrite( &item->messageColor.text, (uint64_t)ptr );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
MemWrite( &item->messageColorFat.time, GetTime() );
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
MemWrite( &item->messageColorFat.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColorFat.b, uint8_t( ( color >> 16 ) & 0xFF ) );
MemWrite( &item->messageColorFat.size, (uint16_t)size );
TracyLfqCommit;
} }
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack ) static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
@ -320,26 +332,27 @@ public:
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return; if( !GetProfiler().IsConnected() ) return;
#endif #endif
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
MemWrite( &item->messageColor.time, GetTime() );
MemWrite( &item->messageColor.text, (uint64_t)txt );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack ); if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
MemWrite( &item->messageColorLiteral.time, GetTime() );
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
MemWrite( &item->messageColorLiteral.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color >> 16 ) & 0xFF ) );
TracyLfqCommit;
} }
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size ) static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
{ {
assert( size < std::numeric_limits<uint16_t>::max() );
InitRPMallocThread(); InitRPMallocThread();
auto ptr = (char*)tracy_malloc( size+1 ); auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size ); memcpy( ptr, txt, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::MessageAppInfo ); TracyLfqPrepare( QueueType::MessageAppInfo );
MemWrite( &item->message.time, GetTime() ); MemWrite( &item->messageFat.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr ); MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item ); GetProfiler().DeferItem( *item );
@ -348,8 +361,9 @@ public:
TracyLfqCommit; TracyLfqCommit;
} }
static tracy_force_inline void MemAlloc( const void* ptr, size_t size ) static tracy_force_inline void MemAlloc( const void* ptr, size_t size, bool secure )
{ {
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return; if( !GetProfiler().IsConnected() ) return;
#endif #endif
@ -360,8 +374,9 @@ public:
GetProfiler().m_serialLock.unlock(); GetProfiler().m_serialLock.unlock();
} }
static tracy_force_inline void MemFree( const void* ptr ) static tracy_force_inline void MemFree( const void* ptr, bool secure )
{ {
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return; if( !GetProfiler().IsConnected() ) return;
#endif #endif
@ -372,8 +387,9 @@ public:
GetProfiler().m_serialLock.unlock(); GetProfiler().m_serialLock.unlock();
} }
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth ) static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
{ {
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler(); auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND # ifdef TRACY_ON_DEMAND
@ -385,16 +401,17 @@ public:
auto callstack = Callstack( depth ); auto callstack = Callstack( depth );
profiler.m_serialLock.lock(); profiler.m_serialLock.lock();
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
SendCallstackMemory( callstack ); SendCallstackMemory( callstack );
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
profiler.m_serialLock.unlock(); profiler.m_serialLock.unlock();
#else #else
MemAlloc( ptr, size ); MemAlloc( ptr, size, secure );
#endif #endif
} }
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth ) static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
{ {
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler(); auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND # ifdef TRACY_ON_DEMAND
@ -406,11 +423,85 @@ public:
auto callstack = Callstack( depth ); auto callstack = Callstack( depth );
profiler.m_serialLock.lock(); profiler.m_serialLock.lock();
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
SendCallstackMemory( callstack ); SendCallstackMemory( callstack );
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
profiler.m_serialLock.unlock(); profiler.m_serialLock.unlock();
#else #else
MemFree( ptr ); MemFree( ptr, secure );
#endif
}
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
GetProfiler().m_serialLock.lock();
SendMemName( name );
SendMemAlloc( QueueType::MemAllocNamed, thread, ptr, size );
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemFreeNamed( const void* ptr, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
GetProfiler().m_serialLock.lock();
SendMemName( name );
SendMemFree( QueueType::MemFreeNamed, thread, ptr );
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
InitRPMallocThread();
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackMemory( callstack );
SendMemName( name );
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
profiler.m_serialLock.unlock();
#else
MemAlloc( ptr, size, secure );
#endif
}
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
InitRPMallocThread();
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackMemory( callstack );
SendMemName( name );
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
profiler.m_serialLock.unlock();
#else
MemFree( ptr, secure );
#endif #endif
} }
@ -419,7 +510,7 @@ public:
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
auto ptr = Callstack( depth ); auto ptr = Callstack( depth );
TracyLfqPrepare( QueueType::Callstack ); TracyLfqPrepare( QueueType::Callstack );
MemWrite( &item->callstack.ptr, (uint64_t)ptr ); MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyLfqCommit; TracyLfqCommit;
#endif #endif
} }
@ -445,12 +536,12 @@ public:
static bool ShouldExit(); static bool ShouldExit();
#ifdef TRACY_ON_DEMAND
tracy_force_inline bool IsConnected() const tracy_force_inline bool IsConnected() const
{ {
return m_isConnected.load( std::memory_order_acquire ); return m_isConnected.load( std::memory_order_acquire );
} }
#ifdef TRACY_ON_DEMAND
tracy_force_inline uint64_t ConnectionId() const tracy_force_inline uint64_t ConnectionId() const
{ {
return m_connectionId.load( std::memory_order_acquire ); return m_connectionId.load( std::memory_order_acquire );
@ -468,11 +559,16 @@ public:
void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); } void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); }
bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); } bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); }
void SendString( uint64_t ptr, const char* str, QueueType type ); void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); }
void SendString( uint64_t str, const char* ptr, size_t len, QueueType type );
void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); }
void SendSingleString( const char* ptr, size_t len );
void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); }
void SendSecondString( const char* ptr, size_t len );
// Allocated source location data layout: // Allocated source location data layout:
// 4b payload size // 2b payload size
// 4b color // 4b color
// 4b source line // 4b source line
// fsz function name // fsz function name
@ -483,30 +579,36 @@ public:
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function ) static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function )
{ {
const auto fsz = strlen( function ); return AllocSourceLocation( line, source, function, nullptr, 0 );
const auto ssz = strlen( source );
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memset( ptr + 4, 0, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, function, fsz+1 );
memcpy( ptr + 12 + fsz + 1, source, ssz + 1 );
return uint64_t( ptr );
} }
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz )
{ {
const auto fsz = strlen( function ); return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz );
const auto ssz = strlen( source ); }
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nameSz );
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz )
{
return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 );
}
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz )
{
const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz );
assert( sz32 <= std::numeric_limits<uint16_t>::max() );
const auto sz = uint16_t( sz32 );
auto ptr = (char*)tracy_malloc( sz ); auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 ); memcpy( ptr, &sz, 2 );
memset( ptr + 4, 0, 4 ); memset( ptr + 2, 0, 4 );
memcpy( ptr + 8, &line, 4 ); memcpy( ptr + 6, &line, 4 );
memcpy( ptr + 12, function, fsz+1 ); memcpy( ptr + 10, function, functionSz );
memcpy( ptr + 12 + fsz + 1, source, ssz + 1 ); ptr[10 + functionSz] = '\0';
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nameSz ); memcpy( ptr + 10 + functionSz + 1, source, sourceSz );
ptr[10 + functionSz + 1 + sourceSz] = '\0';
if( nameSz != 0 )
{
memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz );
}
return uint64_t( ptr ); return uint64_t( ptr );
} }
@ -575,14 +677,14 @@ private:
#ifdef TRACY_HAS_CALLSTACK #ifdef TRACY_HAS_CALLSTACK
auto item = GetProfiler().m_serialQueue.prepare_next(); auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::CallstackMemory ); MemWrite( &item->hdr.type, QueueType::CallstackMemory );
MemWrite( &item->callstackMemory.ptr, (uint64_t)ptr ); MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
GetProfiler().m_serialQueue.commit_next(); GetProfiler().m_serialQueue.commit_next();
#endif #endif
} }
static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size ) static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
{ {
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack ); assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed );
auto item = GetProfiler().m_serialQueue.prepare_next(); auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type ); MemWrite( &item->hdr.type, type );
@ -605,7 +707,7 @@ private:
static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr ) static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
{ {
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack ); assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed );
auto item = GetProfiler().m_serialQueue.prepare_next(); auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type ); MemWrite( &item->hdr.type, type );
@ -615,6 +717,15 @@ private:
GetProfiler().m_serialQueue.commit_next(); GetProfiler().m_serialQueue.commit_next();
} }
static tracy_force_inline void SendMemName( const char* name )
{
assert( name );
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::MemNamePayload );
MemWrite( &item->memName.name, (uint64_t)name );
GetProfiler().m_serialQueue.commit_next();
}
#if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC #if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC
static int64_t GetTimeQpc(); static int64_t GetTimeQpc();
#endif #endif
@ -655,8 +766,8 @@ private:
TracyMutex m_fiLock; TracyMutex m_fiLock;
std::atomic<uint64_t> m_frameCount; std::atomic<uint64_t> m_frameCount;
#ifdef TRACY_ON_DEMAND
std::atomic<bool> m_isConnected; std::atomic<bool> m_isConnected;
#ifdef TRACY_ON_DEMAND
std::atomic<uint64_t> m_connectionId; std::atomic<uint64_t> m_connectionId;
TracyMutex m_deferredLock; TracyMutex m_deferredLock;

116
deps/tracy/client/TracyRingBuffer.hpp vendored Normal file
View File

@ -0,0 +1,116 @@
namespace tracy
{
template<size_t Size>
class RingBuffer
{
public:
RingBuffer( int fd )
: m_fd( fd )
{
const auto pageSize = uint32_t( getpagesize() );
assert( Size >= pageSize );
assert( __builtin_popcount( Size ) == 1 );
m_mapSize = Size + pageSize;
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
if( !mapAddr )
{
m_fd = 0;
close( fd );
return;
}
m_metadata = (perf_event_mmap_page*)mapAddr;
assert( m_metadata->data_offset == pageSize );
m_buffer = ((char*)mapAddr) + pageSize;
}
~RingBuffer()
{
if( m_metadata ) munmap( m_metadata, m_mapSize );
if( m_fd ) close( m_fd );
}
RingBuffer( const RingBuffer& ) = delete;
RingBuffer& operator=( const RingBuffer& ) = delete;
RingBuffer( RingBuffer&& other )
{
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
m_metadata = nullptr;
m_fd = 0;
}
RingBuffer& operator=( RingBuffer&& other )
{
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
m_metadata = nullptr;
m_fd = 0;
return *this;
}
bool IsValid() const { return m_metadata != nullptr; }
void Enable()
{
ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 );
}
bool HasData() const
{
const auto head = LoadHead();
return head > m_metadata->data_tail;
}
void Read( void* dst, uint64_t offset, uint64_t cnt )
{
auto src = ( m_metadata->data_tail + offset ) % Size;
if( src + cnt <= Size )
{
memcpy( dst, m_buffer + src, cnt );
}
else
{
const auto s0 = Size - src;
memcpy( dst, m_buffer + src, s0 );
memcpy( (char*)dst + s0, m_buffer, cnt - s0 );
}
}
void Advance( uint64_t cnt )
{
StoreTail( m_metadata->data_tail + cnt );
}
bool CheckTscCaps() const
{
return m_metadata->cap_user_time_zero;
}
int64_t ConvertTimeToTsc( int64_t timestamp ) const
{
assert( m_metadata->cap_user_time_zero );
const auto time = timestamp - m_metadata->time_zero;
const auto quot = time / m_metadata->time_mult;
const auto rem = time % m_metadata->time_mult;
return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult;
}
private:
uint64_t LoadHead() const
{
return std::atomic_load_explicit( (const volatile std::atomic<uint64_t>*)&m_metadata->data_head, std::memory_order_acquire );
}
void StoreTail( uint64_t tail )
{
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, tail, std::memory_order_release );
}
perf_event_mmap_page* m_metadata;
char* m_buffer;
size_t m_mapSize;
int m_fd;
};
}

View File

@ -1,6 +1,7 @@
#ifndef __TRACYSCOPED_HPP__ #ifndef __TRACYSCOPED_HPP__
#define __TRACYSCOPED_HPP__ #define __TRACYSCOPED_HPP__
#include <limits>
#include <stdint.h> #include <stdint.h>
#include <string.h> #include <string.h>
@ -15,6 +16,11 @@ namespace tracy
class ScopedZone class ScopedZone
{ {
public: public:
ScopedZone( const ScopedZone& ) = delete;
ScopedZone( ScopedZone&& ) = delete;
ScopedZone& operator=( const ScopedZone& ) = delete;
ScopedZone& operator=( ScopedZone&& ) = delete;
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true ) tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() ) : m_active( is_active && GetProfiler().IsConnected() )
@ -43,12 +49,50 @@ public:
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId(); m_connectionId = GetProfiler().ConnectionId();
#endif #endif
GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::ZoneBeginCallstack ); TracyLfqPrepare( QueueType::ZoneBeginCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() ); MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc ); MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit; TracyLfqCommit;
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyLfqCommit;
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth ); GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyLfqCommit;
} }
tracy_force_inline ~ScopedZone() tracy_force_inline ~ScopedZone()
@ -64,29 +108,31 @@ public:
tracy_force_inline void Text( const char* txt, size_t size ) tracy_force_inline void Text( const char* txt, size_t size )
{ {
assert( size < std::numeric_limits<uint16_t>::max() );
if( !m_active ) return; if( !m_active ) return;
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return; if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif #endif
auto ptr = (char*)tracy_malloc( size+1 ); auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size ); memcpy( ptr, txt, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::ZoneText ); TracyLfqPrepare( QueueType::ZoneText );
MemWrite( &item->zoneText.text, (uint64_t)ptr ); MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommit; TracyLfqCommit;
} }
tracy_force_inline void Name( const char* txt, size_t size ) tracy_force_inline void Name( const char* txt, size_t size )
{ {
assert( size < std::numeric_limits<uint16_t>::max() );
if( !m_active ) return; if( !m_active ) return;
#ifdef TRACY_ON_DEMAND #ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return; if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif #endif
auto ptr = (char*)tracy_malloc( size+1 ); auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size ); memcpy( ptr, txt, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::ZoneName ); TracyLfqPrepare( QueueType::ZoneName );
MemWrite( &item->zoneText.text, (uint64_t)ptr ); MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommit; TracyLfqCommit;
} }

View File

@ -21,23 +21,28 @@
# include "../common/TracyAlloc.hpp" # include "../common/TracyAlloc.hpp"
# include "../common/TracySystem.hpp" # include "../common/TracySystem.hpp"
# include "TracyProfiler.hpp" # include "TracyProfiler.hpp"
# include "TracyThread.hpp"
namespace tracy namespace tracy
{ {
DEFINE_GUID ( /* ce1dbfb4-137e-4da6-87b0-3f59aa102cbc */ struct __declspec(uuid("{ce1dbfb4-137e-4da6-87b0-3f59aa102cbc}")) PERFINFOGUID;
PerfInfoGuid, static const auto PerfInfoGuid = __uuidof(PERFINFOGUID);
0xce1dbfb4,
0x137e, struct __declspec(uuid("{802EC45A-1E99-4B83-9920-87C98277BA9D}")) DXGKRNLGUID;
0x4da6, static const auto DxgKrnlGuid = __uuidof(DXGKRNLGUID);
0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc
);
static TRACEHANDLE s_traceHandle; static TRACEHANDLE s_traceHandle;
static TRACEHANDLE s_traceHandle2; static TRACEHANDLE s_traceHandle2;
static EVENT_TRACE_PROPERTIES* s_prop; static EVENT_TRACE_PROPERTIES* s_prop;
static DWORD s_pid; static DWORD s_pid;
static EVENT_TRACE_PROPERTIES* s_propVsync;
static TRACEHANDLE s_traceHandleVsync;
static TRACEHANDLE s_traceHandleVsync2;
Thread* s_threadVsync = nullptr;
struct CSwitch struct CSwitch
{ {
uint32_t newThreadId; uint32_t newThreadId;
@ -85,6 +90,19 @@ struct StackWalkEvent
uint64_t stack[192]; uint64_t stack[192];
}; };
struct VSyncInfo
{
void* dxgAdapter;
uint32_t vidPnTargetId;
uint64_t scannedPhysicalAddress;
uint32_t vidPnSourceId;
uint32_t frameNumber;
int64_t frameQpcTime;
void* hFlipDevice;
uint32_t flipType;
uint64_t flipFenceId;
};
#ifdef __CYGWIN__ #ifdef __CYGWIN__
extern "C" typedef DWORD (WINAPI *t_GetProcessIdOfThread)( HANDLE ); extern "C" typedef DWORD (WINAPI *t_GetProcessIdOfThread)( HANDLE );
extern "C" typedef DWORD (WINAPI *t_GetProcessImageFileNameA)( HANDLE, LPSTR, DWORD ); extern "C" typedef DWORD (WINAPI *t_GetProcessImageFileNameA)( HANDLE, LPSTR, DWORD );
@ -168,9 +186,9 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
memcpy( trace, &sz, sizeof( uint64_t ) ); memcpy( trace, &sz, sizeof( uint64_t ) );
memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz ); memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz );
TracyLfqPrepare( QueueType::CallstackSample ); TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSample.time, sw->eventTimeStamp ); MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp );
MemWrite( &item->callstackSample.thread, (uint64_t)sw->stackThread ); MemWrite( &item->callstackSampleFat.thread, (uint64_t)sw->stackThread );
MemWrite( &item->callstackSample.ptr, (uint64_t)trace ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit; TracyLfqCommit;
} }
} }
@ -181,6 +199,136 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
} }
} }
static constexpr const char* VsyncName[] = {
"[0] Vsync",
"[1] Vsync",
"[2] Vsync",
"[3] Vsync",
"[4] Vsync",
"[5] Vsync",
"[6] Vsync",
"[7] Vsync",
"Vsync"
};
static uint32_t VsyncTarget[8] = {};
void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto& hdr = record->EventHeader;
assert( hdr.ProviderId.Data1 == 0x802EC45A );
assert( hdr.EventDescriptor.Id == 0x0011 );
const auto vs = (const VSyncInfo*)record->UserData;
int idx = 0;
do
{
if( VsyncTarget[idx] == 0 )
{
VsyncTarget[idx] = vs->vidPnTargetId;
break;
}
else if( VsyncTarget[idx] == vs->vidPnTargetId )
{
break;
}
}
while( ++idx < 8 );
TracyLfqPrepare( QueueType::FrameMarkMsg );
MemWrite( &item->frameMark.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->frameMark.name, uint64_t( VsyncName[idx] ) );
TracyLfqCommit;
}
static void SetupVsync()
{
#if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE
const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH;
s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz );
memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) );
s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
s_propVsync->Wnode.BufferSize = psz;
#ifdef TRACY_TIMER_QPC
s_propVsync->Wnode.ClientContext = 1;
#else
s_propVsync->Wnode.ClientContext = 3;
#endif
s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" );
auto backup = tracy_malloc( psz );
memcpy( backup, s_propVsync, psz );
const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP );
if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
{
tracy_free( backup );
tracy_free( s_propVsync );
return;
}
memcpy( s_propVsync, backup, psz );
tracy_free( backup );
const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync );
if( startStatus != ERROR_SUCCESS )
{
tracy_free( s_propVsync );
return;
}
EVENT_FILTER_EVENT_ID fe = {};
fe.FilterIn = TRUE;
fe.Count = 1;
fe.Events[0] = 0x0011; // VSyncDPC_Info
EVENT_FILTER_DESCRIPTOR desc = {};
desc.Ptr = (ULONGLONG)&fe;
desc.Size = sizeof( fe );
desc.Type = EVENT_FILTER_TYPE_EVENT_ID;
ENABLE_TRACE_PARAMETERS params = {};
params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2;
params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0;
params.SourceId = s_propVsync->Wnode.Guid;
params.EnableFilterDesc = &desc;
params.FilterDescCount = 1;
uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base
EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, &params );
char loggerName[MAX_PATH];
strcpy( loggerName, "TracyVsync" );
EVENT_TRACE_LOGFILEA log = {};
log.LoggerName = loggerName;
log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
log.EventRecordCallback = EventRecordCallbackVsync;
s_traceHandleVsync2 = OpenTraceA( &log );
if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE )
{
CloseTrace( s_traceHandleVsync );
tracy_free( s_propVsync );
return;
}
s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_threadVsync) Thread( [] (void*) {
ThreadExitHandler threadExitHandler;
SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
SetThreadName( "Tracy Vsync" );
ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr );
}, nullptr );
#endif
}
bool SysTraceStart( int64_t& samplingPeriod ) bool SysTraceStart( int64_t& samplingPeriod )
{ {
if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
@ -233,6 +381,8 @@ bool SysTraceStart( int64_t& samplingPeriod )
#endif #endif
s_prop->Wnode.Guid = SystemTraceControlGuid; s_prop->Wnode.Guid = SystemTraceControlGuid;
s_prop->BufferSize = 1024; s_prop->BufferSize = 1024;
s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4;
s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6;
s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES ); s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) ); memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) );
@ -242,6 +392,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND ) if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
{ {
tracy_free( backup );
tracy_free( s_prop ); tracy_free( s_prop );
return false; return false;
} }
@ -288,17 +439,29 @@ bool SysTraceStart( int64_t& samplingPeriod )
return false; return false;
} }
SetupVsync();
return true; return true;
} }
void SysTraceStop() void SysTraceStop()
{ {
if( s_threadVsync )
{
CloseTrace( s_traceHandleVsync2 );
CloseTrace( s_traceHandleVsync );
s_threadVsync->~Thread();
tracy_free( s_threadVsync );
}
CloseTrace( s_traceHandle2 ); CloseTrace( s_traceHandle2 );
CloseTrace( s_traceHandle ); CloseTrace( s_traceHandle );
} }
void SysTraceWorker( void* ptr ) void SysTraceWorker( void* ptr )
{ {
ThreadExitHandler threadExitHandler;
SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
SetThreadName( "Tracy SysTrace" ); SetThreadName( "Tracy SysTrace" );
ProcessTrace( &s_traceHandle2, 1, 0, 0 ); ProcessTrace( &s_traceHandle2, 1, 0, 0 );
ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP ); ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
@ -323,7 +486,7 @@ void SysTraceSendExternalName( uint64_t thread )
auto ret = wcstombs( buf, tmp, 256 ); auto ret = wcstombs( buf, tmp, 256 );
if( ret != 0 ) if( ret != 0 )
{ {
GetProfiler().SendString( thread, buf, QueueType::ExternalThreadName ); GetProfiler().SendString( thread, buf, ret, QueueType::ExternalThreadName );
threadSent = true; threadSent = true;
} }
} }
@ -351,9 +514,10 @@ void SysTraceSendExternalName( uint64_t thread )
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage ) if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
{ {
char buf2[1024]; char buf2[1024];
if( _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 ) != 0 ) const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
if( modlen != 0 )
{ {
GetProfiler().SendString( thread, buf2, QueueType::ExternalThreadName ); GetProfiler().SendString( thread, buf2, modlen, QueueType::ExternalThreadName );
threadSent = true; threadSent = true;
} }
} }
@ -367,7 +531,7 @@ void SysTraceSendExternalName( uint64_t thread )
CloseHandle( hnd ); CloseHandle( hnd );
if( !threadSent ) if( !threadSent )
{ {
GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName ); GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
threadSent = true; threadSent = true;
} }
if( pid != 0 ) if( pid != 0 )
@ -381,7 +545,7 @@ void SysTraceSendExternalName( uint64_t thread )
} }
if( pid == 4 ) if( pid == 4 )
{ {
GetProfiler().SendString( thread, "System", QueueType::ExternalName ); GetProfiler().SendString( thread, "System", 6, QueueType::ExternalName );
return; return;
} }
else else
@ -407,9 +571,9 @@ void SysTraceSendExternalName( uint64_t thread )
if( !threadSent ) if( !threadSent )
{ {
GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName ); GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
} }
GetProfiler().SendString( thread, "???", QueueType::ExternalName ); GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName );
} }
} }
@ -428,8 +592,15 @@ void SysTraceSendExternalName( uint64_t thread )
# include <string.h> # include <string.h>
# include <unistd.h> # include <unistd.h>
# include <atomic> # include <atomic>
# include <thread>
# include <linux/perf_event.h>
# include <linux/version.h>
# include <sys/mman.h>
# include <sys/ioctl.h>
# include "TracyProfiler.hpp" # include "TracyProfiler.hpp"
# include "TracyRingBuffer.hpp"
# include "TracyThread.hpp"
# ifdef __ANDROID__ # ifdef __ANDROID__
# include "TracySysTracePayload.hpp" # include "TracySysTracePayload.hpp"
@ -449,6 +620,173 @@ static const char BufferSizeKb[] = "buffer_size_kb";
static const char TracePipe[] = "trace_pipe"; static const char TracePipe[] = "trace_pipe";
static std::atomic<bool> traceActive { false }; static std::atomic<bool> traceActive { false };
static Thread* s_threadSampling = nullptr;
static int s_numCpus = 0;
static constexpr size_t RingBufSize = 64*1024;
static RingBuffer<RingBufSize>* s_ring = nullptr;
static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags )
{
return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
}
static void SetupSampling( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
return;
#endif
samplingPeriod = 100*1000;
s_numCpus = (int)std::thread::hardware_concurrency();
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus );
perf_event_attr pe = {};
pe.type = PERF_TYPE_SOFTWARE;
pe.size = sizeof( perf_event_attr );
pe.config = PERF_COUNT_SW_CPU_CLOCK;
pe.sample_freq = 10000;
pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN;
#if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 )
pe.sample_max_stack = 127;
#endif
pe.exclude_callchain_kernel = 1;
pe.disabled = 1;
pe.freq = 1;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, 0 );
if( fd == -1 )
{
for( int j=0; j<i; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
tracy_free( s_ring );
return;
}
new( s_ring+i ) RingBuffer<RingBufSize>( fd );
}
s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_threadSampling) Thread( [] (void*) {
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy Sampling" );
sched_param sp = { 5 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
uint32_t currentPid = (uint32_t)getpid();
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
for( int i=0; i<s_numCpus; i++ )
{
if( !s_ring[i].CheckTscCaps() )
{
for( int j=0; j<s_numCpus; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
tracy_free( s_ring );
const char* err = "Tracy Profiler: sampling is disabled due to non-native scheduler clock. Are you running under a VM?";
Profiler::MessageAppInfo( err, strlen( err ) );
return;
}
}
#endif
for( int i=0; i<s_numCpus; i++ ) s_ring[i].Enable();
for(;;)
{
bool hadData = false;
for( int i=0; i<s_numCpus; i++ )
{
if( !traceActive.load( std::memory_order_relaxed ) ) break;
if( !s_ring[i].HasData() ) continue;
hadData = true;
perf_event_header hdr;
s_ring[i].Read( &hdr, 0, sizeof( perf_event_header ) );
if( hdr.type == PERF_RECORD_SAMPLE )
{
uint32_t pid, tid;
uint64_t t0;
uint64_t cnt;
auto offset = sizeof( perf_event_header );
s_ring[i].Read( &pid, offset, sizeof( uint32_t ) );
if( pid == currentPid )
{
offset += sizeof( uint32_t );
s_ring[i].Read( &tid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
s_ring[i].Read( &t0, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
s_ring[i].Read( &cnt, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
auto trace = (uint64_t*)tracy_malloc( ( 1 + cnt ) * sizeof( uint64_t ) );
s_ring[i].Read( trace+1, offset, sizeof( uint64_t ) * cnt );
// remove non-canonical pointers
do
{
const auto test = (int64_t)trace[cnt];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 == m2 ) break;
}
while( --cnt > 0 );
for( uint64_t j=1; j<cnt; j++ )
{
const auto test = (int64_t)trace[j];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 != m2 ) trace[j] = 0;
}
// skip kernel frames
uint64_t j;
for( j=0; j<cnt; j++ )
{
if( (int64_t)trace[j+1] >= 0 ) break;
}
if( j == cnt )
{
tracy_free( trace );
}
else
{
if( j > 0 )
{
cnt -= j;
memmove( trace+1, trace+1+j, sizeof( uint64_t ) * cnt );
}
memcpy( trace, &cnt, sizeof( uint64_t ) );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
t0 = s_ring[i].ConvertTimeToTsc( t0 );
#endif
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, t0 );
MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
}
}
s_ring[i].Advance( hdr.size );
}
if( !hadData )
{
std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
}
}
for( int i=0; i<s_numCpus; i++ ) s_ring[i].~RingBuffer<RingBufSize>();
tracy_free( s_ring );
}, nullptr );
}
#ifdef __ANDROID__ #ifdef __ANDROID__
static bool TraceWrite( const char* path, size_t psz, const char* val, size_t vsz ) static bool TraceWrite( const char* path, size_t psz, const char* val, size_t vsz )
@ -525,6 +863,10 @@ void SysTraceInjectPayload()
bool SysTraceStart( int64_t& samplingPeriod ) bool SysTraceStart( int64_t& samplingPeriod )
{ {
#ifndef CLOCK_MONOTONIC_RAW
return false;
#endif
if( !TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 ) ) return false; if( !TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 ) ) return false;
if( !TraceWrite( CurrentTracer, sizeof( CurrentTracer ), "nop", 4 ) ) return false; if( !TraceWrite( CurrentTracer, sizeof( CurrentTracer ), "nop", 4 ) ) return false;
TraceWrite( TraceOptions, sizeof( TraceOptions ), "norecord-cmd", 13 ); TraceWrite( TraceOptions, sizeof( TraceOptions ), "norecord-cmd", 13 );
@ -533,12 +875,12 @@ bool SysTraceStart( int64_t& samplingPeriod )
TraceWrite( TraceOptions, sizeof( TraceOptions ), "noannotate", 11 ); TraceWrite( TraceOptions, sizeof( TraceOptions ), "noannotate", 11 );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
if( !TraceWrite( TraceClock, sizeof( TraceClock ), "x86-tsc", 8 ) ) return false; if( !TraceWrite( TraceClock, sizeof( TraceClock ), "x86-tsc", 8 ) ) return false;
#elif __ARM_ARCH >= 6 #else
if( !TraceWrite( TraceClock, sizeof( TraceClock ), "mono_raw", 9 ) ) return false; if( !TraceWrite( TraceClock, sizeof( TraceClock ), "mono_raw", 9 ) ) return false;
#endif #endif
if( !TraceWrite( SchedSwitch, sizeof( SchedSwitch ), "1", 2 ) ) return false; if( !TraceWrite( SchedSwitch, sizeof( SchedSwitch ), "1", 2 ) ) return false;
if( !TraceWrite( SchedWakeup, sizeof( SchedWakeup ), "1", 2 ) ) return false; if( !TraceWrite( SchedWakeup, sizeof( SchedWakeup ), "1", 2 ) ) return false;
if( !TraceWrite( BufferSizeKb, sizeof( BufferSizeKb ), "512", 4 ) ) return false; if( !TraceWrite( BufferSizeKb, sizeof( BufferSizeKb ), "4096", 5 ) ) return false;
#if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH ) #if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH )
SysTraceInjectPayload(); SysTraceInjectPayload();
@ -547,6 +889,8 @@ bool SysTraceStart( int64_t& samplingPeriod )
if( !TraceWrite( TracingOn, sizeof( TracingOn ), "1", 2 ) ) return false; if( !TraceWrite( TracingOn, sizeof( TracingOn ), "1", 2 ) ) return false;
traceActive.store( true, std::memory_order_relaxed ); traceActive.store( true, std::memory_order_relaxed );
SetupSampling( samplingPeriod );
return true; return true;
} }
@ -554,23 +898,27 @@ void SysTraceStop()
{ {
TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 ); TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 );
traceActive.store( false, std::memory_order_relaxed ); traceActive.store( false, std::memory_order_relaxed );
if( s_threadSampling )
{
s_threadSampling->~Thread();
tracy_free( s_threadSampling );
}
} }
static uint64_t ReadNumber( const char*& ptr ) static uint64_t ReadNumber( const char*& data )
{ {
uint64_t val = 0; auto ptr = data;
assert( *ptr >= '0' && *ptr <= '9' );
uint64_t val = *ptr++ - '0';
for(;;) for(;;)
{ {
if( *ptr >= '0' && *ptr <= '9' ) const uint8_t v = uint8_t( *ptr - '0' );
{ if( v > 9 ) break;
val = val * 10 + ( *ptr - '0' ); val = val * 10 + v;
ptr++; ptr++;
}
else
{
return val;
}
} }
data = ptr;
return val;
} }
static uint8_t ReadState( char state ) static uint8_t ReadState( char state )
@ -674,7 +1022,7 @@ static void HandleTraceLine( const char* line )
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
const auto time = ReadNumber( line ); const auto time = ReadNumber( line );
#elif __ARM_ARCH >= 6 #else
const auto ts = ReadNumber( line ); const auto ts = ReadNumber( line );
line++; // '.' line++; // '.'
const auto tus = ReadNumber( line ); const auto tus = ReadNumber( line );
@ -774,19 +1122,16 @@ static void ProcessTraceLines( int fd )
line = buf; line = buf;
for(;;) for(;;)
{ {
auto next = line; auto next = (char*)memchr( line, '\n', end - line );
while( next < end && *next != '\n' ) next++; if( !next )
next++;
if( next >= end )
{ {
const auto lsz = end - line; const auto lsz = end - line;
memmove( buf, line, lsz ); memmove( buf, line, lsz );
line = buf + lsz; line = buf + lsz;
break; break;
} }
HandleTraceLine( line ); HandleTraceLine( line );
line = next; line = ++next;
} }
if( rd < 64*1024 ) if( rd < 64*1024 )
{ {
@ -799,6 +1144,7 @@ static void ProcessTraceLines( int fd )
void SysTraceWorker( void* ptr ) void SysTraceWorker( void* ptr )
{ {
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy SysTrace" ); SetThreadName( "Tracy SysTrace" );
int pipefd[2]; int pipefd[2];
if( pipe( pipefd ) == 0 ) if( pipe( pipefd ) == 0 )
@ -812,6 +1158,8 @@ void SysTraceWorker( void* ptr )
if( dup2( pipefd[1], STDOUT_FILENO ) >= 0 ) if( dup2( pipefd[1], STDOUT_FILENO ) >= 0 )
{ {
close( pipefd[1] ); close( pipefd[1] );
sched_param sp = { 4 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
#if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH ) #if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH )
execlp( "su", "su", "-c", "/data/tracy_systrace", (char*)nullptr ); execlp( "su", "su", "-c", "/data/tracy_systrace", (char*)nullptr );
#endif #endif
@ -823,6 +1171,8 @@ void SysTraceWorker( void* ptr )
{ {
// parent // parent
close( pipefd[1] ); close( pipefd[1] );
sched_param sp = { 5 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
ProcessTraceLines( pipefd[0] ); ProcessTraceLines( pipefd[0] );
close( pipefd[0] ); close( pipefd[0] );
} }
@ -856,14 +1206,10 @@ static void ProcessTraceLines( int fd )
const auto end = buf + rd; const auto end = buf + rd;
for(;;) for(;;)
{ {
auto next = line; auto next = (char*)memchr( line, '\n', end - line );
while( next < end && *next != '\n' ) next++; if( !next ) break;
if( next == end ) break;
assert( *next == '\n' );
next++;
HandleTraceLine( line ); HandleTraceLine( line );
line = next; line = ++next;
} }
} }
@ -872,6 +1218,7 @@ static void ProcessTraceLines( int fd )
void SysTraceWorker( void* ptr ) void SysTraceWorker( void* ptr )
{ {
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy SysTrace" ); SetThreadName( "Tracy SysTrace" );
char tmp[256]; char tmp[256];
memcpy( tmp, BasePath, sizeof( BasePath ) - 1 ); memcpy( tmp, BasePath, sizeof( BasePath ) - 1 );
@ -879,6 +1226,8 @@ void SysTraceWorker( void* ptr )
int fd = open( tmp, O_RDONLY ); int fd = open( tmp, O_RDONLY );
if( fd < 0 ) return; if( fd < 0 ) return;
sched_param sp = { 5 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
ProcessTraceLines( fd ); ProcessTraceLines( fd );
close( fd ); close( fd );
} }
@ -900,7 +1249,7 @@ void SysTraceSendExternalName( uint64_t thread )
} }
else else
{ {
GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName ); GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
} }
sprintf( fn, "/proc/%" PRIu64 "/status", thread ); sprintf( fn, "/proc/%" PRIu64 "/status", thread );
@ -909,7 +1258,7 @@ void SysTraceSendExternalName( uint64_t thread )
{ {
int pid = -1; int pid = -1;
size_t lsz = 1024; size_t lsz = 1024;
auto line = (char*)malloc( lsz ); auto line = (char*)tracy_malloc( lsz );
for(;;) for(;;)
{ {
auto rd = getline( &line, &lsz, f ); auto rd = getline( &line, &lsz, f );
@ -920,7 +1269,7 @@ void SysTraceSendExternalName( uint64_t thread )
break; break;
} }
} }
free( line ); tracy_free( line );
fclose( f ); fclose( f );
if( pid >= 0 ) if( pid >= 0 )
{ {
@ -944,7 +1293,7 @@ void SysTraceSendExternalName( uint64_t thread )
} }
} }
} }
GetProfiler().SendString( thread, "???", QueueType::ExternalName ); GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName );
} }
} }

View File

@ -7,9 +7,24 @@
# include <pthread.h> # include <pthread.h>
#endif #endif
#ifdef TRACY_MANUAL_LIFETIME
# include "tracy_rpmalloc.hpp"
#endif
namespace tracy namespace tracy
{ {
class ThreadExitHandler
{
public:
~ThreadExitHandler()
{
#ifdef TRACY_MANUAL_LIFETIME
rpmalloc_thread_finalize();
#endif
}
};
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32 || defined __CYGWIN__
class Thread class Thread

View File

@ -62,24 +62,6 @@
namespace tracy namespace tracy
{ {
// Exceptions
#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
#define MOODYCAMEL_EXCEPTIONS_ENABLED
#endif
#endif
#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
#define MOODYCAMEL_TRY try
#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
#define MOODYCAMEL_RETHROW throw
#define MOODYCAMEL_THROW(expr) throw (expr)
#else
#define MOODYCAMEL_TRY if (true)
#define MOODYCAMEL_CATCH(...) else if (false)
#define MOODYCAMEL_RETHROW
#define MOODYCAMEL_THROW(expr)
#endif
#ifndef MOODYCAMEL_NOEXCEPT #ifndef MOODYCAMEL_NOEXCEPT
#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED) #if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
#define MOODYCAMEL_NOEXCEPT #define MOODYCAMEL_NOEXCEPT

View File

@ -1372,7 +1372,7 @@ _memory_allocate_heap(void) {
heap = (heap_t*)_memory_map((1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size, &align_offset); heap = (heap_t*)_memory_map((1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size, &align_offset);
if (!heap) if (!heap)
return heap; return heap;
memset(heap, 0, sizeof(heap_t)); memset((char*)heap, 0, sizeof(heap_t));
heap->align_offset = align_offset; heap->align_offset = align_offset;
//Get a new heap ID //Get a new heap ID

View File

@ -9,8 +9,8 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; } constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 35 }; enum : uint32_t { ProtocolVersion = 42 };
enum : uint32_t { BroadcastVersion = 1 }; enum : uint16_t { BroadcastVersion = 2 };
using lz4sz_t = uint32_t; using lz4sz_t = uint32_t;
@ -87,6 +87,7 @@ struct WelcomeMessage
uint8_t onDemand; uint8_t onDemand;
uint8_t isApple; uint8_t isApple;
uint8_t cpuArch; uint8_t cpuArch;
uint8_t codeTransfer;
char cpuManufacturer[12]; char cpuManufacturer[12];
uint32_t cpuId; uint32_t cpuId;
char programName[WelcomeMessageProgramNameSize]; char programName[WelcomeMessageProgramNameSize];
@ -107,10 +108,10 @@ enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) };
struct BroadcastMessage struct BroadcastMessage
{ {
uint32_t broadcastVersion; uint16_t broadcastVersion;
uint16_t listenPort;
uint32_t protocolVersion; uint32_t protocolVersion;
uint32_t listenPort; int32_t activeTime; // in seconds
uint32_t activeTime; // in seconds
char programName[WelcomeMessageProgramNameSize]; char programName[WelcomeMessageProgramNameSize];
}; };

View File

@ -16,19 +16,12 @@ enum class QueueType : uint8_t
MessageColorCallstack, MessageColorCallstack,
MessageAppInfo, MessageAppInfo,
ZoneBeginAllocSrcLoc, ZoneBeginAllocSrcLoc,
ZoneBeginAllocSrcLocLean,
ZoneBeginAllocSrcLocCallstack, ZoneBeginAllocSrcLocCallstack,
ZoneBeginAllocSrcLocCallstackLean,
CallstackMemory, CallstackMemory,
CallstackMemoryLean,
Callstack, Callstack,
CallstackLean,
CallstackAlloc, CallstackAlloc,
CallstackAllocLean,
CallstackSample, CallstackSample,
CallstackSampleLean,
FrameImage, FrameImage,
FrameImageLean,
ZoneBegin, ZoneBegin,
ZoneBeginCallstack, ZoneBeginCallstack,
ZoneEnd, ZoneEnd,
@ -40,9 +33,13 @@ enum class QueueType : uint8_t
LockSharedRelease, LockSharedRelease,
LockName, LockName,
MemAlloc, MemAlloc,
MemAllocNamed,
MemFree, MemFree,
MemFreeNamed,
MemAllocCallstack, MemAllocCallstack,
MemAllocCallstackNamed,
MemFreeCallstack, MemFreeCallstack,
MemFreeCallstackNamed,
GpuZoneBegin, GpuZoneBegin,
GpuZoneBeginCallstack, GpuZoneBeginCallstack,
GpuZoneEnd, GpuZoneEnd,
@ -56,6 +53,7 @@ enum class QueueType : uint8_t
Terminate, Terminate,
KeepAlive, KeepAlive,
ThreadContext, ThreadContext,
GpuCalibration,
Crash, Crash,
CrashReport, CrashReport,
ZoneValidation, ZoneValidation,
@ -82,9 +80,11 @@ enum class QueueType : uint8_t
ParamSetup, ParamSetup,
ParamPingback, ParamPingback,
CpuTopology, CpuTopology,
SingleStringData,
SecondStringData,
MemNamePayload,
StringData, StringData,
ThreadName, ThreadName,
CustomStringData,
PlotName, PlotName,
SourceLocationPayload, SourceLocationPayload,
CallstackPayload, CallstackPayload,
@ -140,15 +140,15 @@ struct QueueFrameMark
uint64_t name; // ptr uint64_t name; // ptr
}; };
struct QueueFrameImageLean struct QueueFrameImage
{ {
uint64_t frame; uint32_t frame;
uint16_t w; uint16_t w;
uint16_t h; uint16_t h;
uint8_t flip; uint8_t flip;
}; };
struct QueueFrameImage : public QueueFrameImageLean struct QueueFrameImageFat : public QueueFrameImage
{ {
uint64_t image; // ptr uint64_t image; // ptr
}; };
@ -164,9 +164,10 @@ struct QueueSourceLocation
uint8_t b; uint8_t b;
}; };
struct QueueZoneText struct QueueZoneTextFat
{ {
uint64_t text; // ptr uint64_t text; // ptr
uint16_t size;
}; };
enum class LockType : uint8_t enum class LockType : uint8_t
@ -187,7 +188,6 @@ struct QueueLockTerminate
{ {
uint32_t id; uint32_t id;
int64_t time; int64_t time;
LockType type;
}; };
struct QueueLockWait struct QueueLockWait
@ -195,7 +195,6 @@ struct QueueLockWait
uint64_t thread; uint64_t thread;
uint32_t id; uint32_t id;
int64_t time; int64_t time;
LockType type;
}; };
struct QueueLockObtain struct QueueLockObtain
@ -222,7 +221,12 @@ struct QueueLockMark
struct QueueLockName struct QueueLockName
{ {
uint32_t id; uint32_t id;
};
struct QueueLockNameFat : public QueueLockName
{
uint64_t name; // ptr uint64_t name; // ptr
uint16_t size;
}; };
enum class PlotDataType : uint8_t enum class PlotDataType : uint8_t
@ -248,7 +252,6 @@ struct QueuePlotData
struct QueueMessage struct QueueMessage
{ {
int64_t time; int64_t time;
uint64_t text; // ptr
}; };
struct QueueMessageColor : public QueueMessage struct QueueMessageColor : public QueueMessage
@ -258,6 +261,28 @@ struct QueueMessageColor : public QueueMessage
uint8_t b; uint8_t b;
}; };
struct QueueMessageLiteral : public QueueMessage
{
uint64_t text; // ptr
};
struct QueueMessageColorLiteral : public QueueMessageColor
{
uint64_t text; // ptr
};
struct QueueMessageFat : public QueueMessage
{
uint64_t text; // ptr
uint16_t size;
};
struct QueueMessageColorFat : public QueueMessageColor
{
uint64_t text; // ptr
uint16_t size;
};
// Don't change order, only add new entries at the end, this is also used on trace dumps! // Don't change order, only add new entries at the end, this is also used on trace dumps!
enum class GpuContextType : uint8_t enum class GpuContextType : uint8_t
{ {
@ -268,6 +293,11 @@ enum class GpuContextType : uint8_t
Direct3D12 Direct3D12
}; };
enum GpuContextFlags : uint8_t
{
GpuContextCalibration = 1 << 0
};
struct QueueGpuNewContext struct QueueGpuNewContext
{ {
int64_t cpuTime; int64_t cpuTime;
@ -275,7 +305,7 @@ struct QueueGpuNewContext
uint64_t thread; uint64_t thread;
float period; float period;
uint8_t context; uint8_t context;
uint8_t accuracyBits; GpuContextFlags flags;
GpuContextType type; GpuContextType type;
}; };
@ -303,6 +333,19 @@ struct QueueGpuTime
uint8_t context; uint8_t context;
}; };
struct QueueGpuCalibration
{
int64_t gpuTime;
int64_t cpuTime;
int64_t cpuDelta;
uint8_t context;
};
struct QueueMemNamePayload
{
uint64_t name;
};
struct QueueMemAlloc struct QueueMemAlloc
{ {
int64_t time; int64_t time;
@ -318,29 +361,24 @@ struct QueueMemFree
uint64_t ptr; uint64_t ptr;
}; };
struct QueueCallstackMemory struct QueueCallstackFat
{ {
uint64_t ptr; uint64_t ptr;
}; };
struct QueueCallstack struct QueueCallstackAllocFat
{
uint64_t ptr;
};
struct QueueCallstackAlloc
{ {
uint64_t ptr; uint64_t ptr;
uint64_t nativePtr; uint64_t nativePtr;
}; };
struct QueueCallstackSampleLean struct QueueCallstackSample
{ {
int64_t time; int64_t time;
uint64_t thread; uint64_t thread;
}; };
struct QueueCallstackSample : public QueueCallstackSampleLean struct QueueCallstackSampleFat : public QueueCallstackSample
{ {
uint64_t ptr; uint64_t ptr;
}; };
@ -349,21 +387,17 @@ struct QueueCallstackFrameSize
{ {
uint64_t ptr; uint64_t ptr;
uint8_t size; uint8_t size;
uint64_t imageName;
}; };
struct QueueCallstackFrame struct QueueCallstackFrame
{ {
uint64_t name;
uint64_t file;
uint32_t line; uint32_t line;
uint64_t symAddr; uint64_t symAddr;
char symLen[3]; uint32_t symLen;
}; };
struct QueueSymbolInformation struct QueueSymbolInformation
{ {
uint64_t file;
uint32_t line; uint32_t line;
uint64_t symAddr; uint64_t symAddr;
}; };
@ -371,7 +405,6 @@ struct QueueSymbolInformation
struct QueueCodeInformation struct QueueCodeInformation
{ {
uint64_t ptr; uint64_t ptr;
uint64_t file;
uint32_t line; uint32_t line;
}; };
@ -460,9 +493,9 @@ struct QueueItem
QueueStringTransfer stringTransfer; QueueStringTransfer stringTransfer;
QueueFrameMark frameMark; QueueFrameMark frameMark;
QueueFrameImage frameImage; QueueFrameImage frameImage;
QueueFrameImage frameImageLean; QueueFrameImageFat frameImageFat;
QueueSourceLocation srcloc; QueueSourceLocation srcloc;
QueueZoneText zoneText; QueueZoneTextFat zoneTextFat;
QueueLockAnnounce lockAnnounce; QueueLockAnnounce lockAnnounce;
QueueLockTerminate lockTerminate; QueueLockTerminate lockTerminate;
QueueLockWait lockWait; QueueLockWait lockWait;
@ -470,20 +503,26 @@ struct QueueItem
QueueLockRelease lockRelease; QueueLockRelease lockRelease;
QueueLockMark lockMark; QueueLockMark lockMark;
QueueLockName lockName; QueueLockName lockName;
QueueLockNameFat lockNameFat;
QueuePlotData plotData; QueuePlotData plotData;
QueueMessage message; QueueMessage message;
QueueMessageColor messageColor; QueueMessageColor messageColor;
QueueMessageLiteral messageLiteral;
QueueMessageColorLiteral messageColorLiteral;
QueueMessageFat messageFat;
QueueMessageColorFat messageColorFat;
QueueGpuNewContext gpuNewContext; QueueGpuNewContext gpuNewContext;
QueueGpuZoneBegin gpuZoneBegin; QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneEnd gpuZoneEnd; QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime; QueueGpuTime gpuTime;
QueueGpuCalibration gpuCalibration;
QueueMemAlloc memAlloc; QueueMemAlloc memAlloc;
QueueMemFree memFree; QueueMemFree memFree;
QueueCallstackMemory callstackMemory; QueueMemNamePayload memName;
QueueCallstack callstack; QueueCallstackFat callstackFat;
QueueCallstackAlloc callstackAlloc; QueueCallstackAllocFat callstackAllocFat;
QueueCallstackSample callstackSample; QueueCallstackSample callstackSample;
QueueCallstackSampleLean callstackSampleLean; QueueCallstackSampleFat callstackSampleFat;
QueueCallstackFrameSize callstackFrameSize; QueueCallstackFrameSize callstackFrameSize;
QueueCallstackFrame callstackFrame; QueueCallstackFrame callstackFrame;
QueueSymbolInformation symbolInformation; QueueSymbolInformation symbolInformation;
@ -504,27 +543,20 @@ struct QueueItem
enum { QueueItemSize = sizeof( QueueItem ) }; enum { QueueItemSize = sizeof( QueueItem ) };
static constexpr size_t QueueDataSize[] = { static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueZoneText ), sizeof( QueueHeader ), // zone text
sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name sizeof( QueueHeader ), // zone name
sizeof( QueueHeader ) + sizeof( QueueMessage ), sizeof( QueueHeader ) + sizeof( QueueMessage ),
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), sizeof( QueueHeader ) + sizeof( QueueMessageColor ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, not for network transfer sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // lean allocated source location sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, callstack, not for network transfer sizeof( QueueHeader ), // callstack memory
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // lean allocated source location, callstack sizeof( QueueHeader ), // callstack
sizeof( QueueHeader ) + sizeof( QueueCallstackMemory ), // not for network transfer sizeof( QueueHeader ), // callstack alloc
sizeof( QueueHeader ), // lean callstack memory sizeof( QueueHeader ) + sizeof( QueueCallstackSample ),
sizeof( QueueHeader ) + sizeof( QueueCallstack ), // not for network transfer sizeof( QueueHeader ) + sizeof( QueueFrameImage ),
sizeof( QueueHeader ), // lean callstack
sizeof( QueueHeader ) + sizeof( QueueCallstackAlloc ), // not for network transfer
sizeof( QueueHeader ), // lean callstack alloc
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // not for network transfer
sizeof( QueueHeader ) + sizeof( QueueCallstackSampleLean ),
sizeof( QueueHeader ) + sizeof( QueueFrameImage ), // not for network transfer
sizeof( QueueHeader ) + sizeof( QueueFrameImageLean ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueZoneEnd ), sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
@ -536,9 +568,13 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockName ), sizeof( QueueHeader ) + sizeof( QueueLockName ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named
sizeof( QueueHeader ) + sizeof( QueueMemFree ), sizeof( QueueHeader ) + sizeof( QueueMemFree ),
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ), sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
@ -553,6 +589,7 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ), // terminate sizeof( QueueHeader ), // terminate
sizeof( QueueHeader ), // keep alive sizeof( QueueHeader ), // keep alive
sizeof( QueueHeader ) + sizeof( QueueThreadContext ), sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
sizeof( QueueHeader ), // crash sizeof( QueueHeader ), // crash
sizeof( QueueHeader ) + sizeof( QueueCrashReport ), sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ), sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
@ -564,10 +601,10 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ), sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
sizeof( QueueHeader ) + sizeof( QueueLockTerminate ), sizeof( QueueHeader ) + sizeof( QueueLockTerminate ),
sizeof( QueueHeader ) + sizeof( QueueLockMark ), sizeof( QueueHeader ) + sizeof( QueueLockMark ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ),
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal, callstack sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal, callstack sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ), sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ), sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
@ -579,10 +616,12 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueParamSetup ), sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
sizeof( QueueHeader ), // param pingback sizeof( QueueHeader ), // param pingback
sizeof( QueueHeader ) + sizeof( QueueCpuTopology ), sizeof( QueueHeader ) + sizeof( QueueCpuTopology ),
sizeof( QueueHeader ), // single string data
sizeof( QueueHeader ), // second string data
sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
// keep all QueueStringTransfer below // keep all QueueStringTransfer below
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload

View File

@ -1,4 +1,5 @@
#include <assert.h> #include <assert.h>
#include <inttypes.h>
#include <new> #include <new>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -106,36 +107,39 @@ Socket::~Socket()
} }
} }
bool Socket::Connect( const char* addr, int port ) bool Socket::Connect( const char* addr, uint16_t port )
{ {
assert( !IsValid() ); assert( !IsValid() );
if( m_ptr ) if( m_ptr )
{ {
const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen ); const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen );
assert( c == -1 ); if( c == -1 )
#if defined _WIN32 || defined __CYGWIN__
const auto err = WSAGetLastError();
if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
if( err != WSAEISCONN )
{ {
freeaddrinfo( m_res ); #if defined _WIN32
closesocket( m_connSock ); const auto err = WSAGetLastError();
m_ptr = nullptr; if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
return false; if( err != WSAEISCONN )
} {
freeaddrinfo( m_res );
closesocket( m_connSock );
m_ptr = nullptr;
return false;
}
#else #else
if( errno == EALREADY || errno == EINPROGRESS ) return false; const auto err = errno;
if( errno != EISCONN ) if( err == EALREADY || err == EINPROGRESS ) return false;
{ if( err != EISCONN )
freeaddrinfo( m_res ); {
close( m_connSock ); freeaddrinfo( m_res );
m_ptr = nullptr; close( m_connSock );
return false; m_ptr = nullptr;
} return false;
}
#endif #endif
}
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
u_long nonblocking = 0; u_long nonblocking = 0;
ioctlsocket( m_connSock, FIONBIO, &nonblocking ); ioctlsocket( m_connSock, FIONBIO, &nonblocking );
#else #else
@ -156,7 +160,7 @@ bool Socket::Connect( const char* addr, int port )
hints.ai_socktype = SOCK_STREAM; hints.ai_socktype = SOCK_STREAM;
char portbuf[32]; char portbuf[32];
sprintf( portbuf, "%i", port ); sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
int sock = 0; int sock = 0;
@ -167,7 +171,7 @@ bool Socket::Connect( const char* addr, int port )
int val = 1; int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif #endif
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
u_long nonblocking = 1; u_long nonblocking = 1;
ioctlsocket( sock, FIONBIO, &nonblocking ); ioctlsocket( sock, FIONBIO, &nonblocking );
#else #else
@ -180,7 +184,7 @@ bool Socket::Connect( const char* addr, int port )
} }
else else
{ {
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
const auto err = WSAGetLastError(); const auto err = WSAGetLastError();
if( err != WSAEWOULDBLOCK ) if( err != WSAEWOULDBLOCK )
{ {
@ -203,7 +207,7 @@ bool Socket::Connect( const char* addr, int port )
freeaddrinfo( res ); freeaddrinfo( res );
if( !ptr ) return false; if( !ptr ) return false;
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
u_long nonblocking = 0; u_long nonblocking = 0;
ioctlsocket( sock, FIONBIO, &nonblocking ); ioctlsocket( sock, FIONBIO, &nonblocking );
#else #else
@ -215,6 +219,48 @@ bool Socket::Connect( const char* addr, int port )
return true; return true;
} }
bool Socket::ConnectBlocking( const char* addr, uint16_t port )
{
assert( !IsValid() );
assert( !m_ptr );
struct addrinfo hints;
struct addrinfo *res, *ptr;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
int sock = 0;
for( ptr = res; ptr; ptr = ptr->ai_next )
{
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
#if defined __APPLE__
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 )
{
#ifdef _WIN32
closesocket( sock );
#else
close( sock );
#endif
continue;
}
break;
}
freeaddrinfo( res );
if( !ptr ) return false;
m_sock.store( sock, std::memory_order_relaxed );
return true;
}
void Socket::Close() void Socket::Close()
{ {
const auto sock = m_sock.load( std::memory_order_relaxed ); const auto sock = m_sock.load( std::memory_order_relaxed );
@ -247,7 +293,7 @@ int Socket::GetSendBufSize()
{ {
const auto sock = m_sock.load( std::memory_order_relaxed ); const auto sock = m_sock.load( std::memory_order_relaxed );
int bufSize; int bufSize;
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
int sz = sizeof( bufSize ); int sz = sizeof( bufSize );
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz ); getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
#else #else
@ -306,6 +352,24 @@ int Socket::Recv( void* _buf, int len, int timeout )
} }
} }
int Socket::ReadUpTo( void* _buf, int len, int timeout )
{
const auto sock = m_sock.load( std::memory_order_relaxed );
auto buf = (char*)_buf;
int rd = 0;
while( len > 0 )
{
const auto res = recv( sock, buf, len, 0 );
if( res == 0 ) break;
if( res == -1 ) return -1;
len -= res;
rd += res;
buf += res;
}
return rd;
}
bool Socket::Read( void* buf, int len, int timeout ) bool Socket::Read( void* buf, int len, int timeout )
{ {
auto cbuf = (char*)buf; auto cbuf = (char*)buf;
@ -383,33 +447,45 @@ ListenSocket::~ListenSocket()
if( m_sock != -1 ) Close(); if( m_sock != -1 ) Close();
} }
bool ListenSocket::Listen( int port, int backlog ) static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res )
{
struct addrinfo hints;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = ai_family;
hints.ai_socktype = SOCK_STREAM;
#ifndef TRACY_ONLY_LOCALHOST
const char* onlyLocalhost = getenv( "TRACY_ONLY_LOCALHOST" );
if( !onlyLocalhost || onlyLocalhost[0] != '1' )
{
hints.ai_flags = AI_PASSIVE;
}
#endif
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1;
int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol );
if (sock == -1) freeaddrinfo( *res );
return sock;
}
bool ListenSocket::Listen( uint16_t port, int backlog )
{ {
assert( m_sock == -1 ); assert( m_sock == -1 );
struct addrinfo* res; struct addrinfo* res = nullptr;
struct addrinfo hints;
memset( &hints, 0, sizeof( hints ) ); #ifndef TRACY_ONLY_IPV4
hints.ai_family = AF_INET6; const char* onlyIPv4 = getenv( "TRACY_ONLY_IPV4" );
hints.ai_socktype = SOCK_STREAM; if( !onlyIPv4 || onlyIPv4[0] != '1' )
#ifndef TRACY_ONLY_LOCALHOST {
hints.ai_flags = AI_PASSIVE; m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res );
}
#endif #endif
char portbuf[32];
sprintf( portbuf, "%i", port );
if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false;
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
if (m_sock == -1) if (m_sock == -1)
{ {
// IPV6 protocol may not be available/is disabled. Try to create a socket // IPV6 protocol may not be available/is disabled. Try to create a socket
// with the IPV4 protocol // with the IPV4 protocol
hints.ai_family = AF_INET; m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res );
if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false;
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
if( m_sock == -1 ) return false; if( m_sock == -1 ) return false;
} }
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32 || defined __CYGWIN__
@ -483,7 +559,7 @@ UdpBroadcast::~UdpBroadcast()
if( m_sock != -1 ) Close(); if( m_sock != -1 ) Close();
} }
bool UdpBroadcast::Open( const char* addr, int port ) bool UdpBroadcast::Open( const char* addr, uint16_t port )
{ {
assert( m_sock == -1 ); assert( m_sock == -1 );
@ -495,7 +571,7 @@ bool UdpBroadcast::Open( const char* addr, int port )
hints.ai_socktype = SOCK_DGRAM; hints.ai_socktype = SOCK_DGRAM;
char portbuf[32]; char portbuf[32];
sprintf( portbuf, "%i", port ); sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false; if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
int sock = 0; int sock = 0;
@ -506,7 +582,7 @@ bool UdpBroadcast::Open( const char* addr, int port )
int val = 1; int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif #endif
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
unsigned long broadcast = 1; unsigned long broadcast = 1;
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
#else #else
@ -541,7 +617,7 @@ void UdpBroadcast::Close()
m_sock = -1; m_sock = -1;
} }
int UdpBroadcast::Send( int port, const void* data, int len ) int UdpBroadcast::Send( uint16_t port, const void* data, int len )
{ {
assert( m_sock != -1 ); assert( m_sock != -1 );
struct sockaddr_in addr; struct sockaddr_in addr;
@ -563,8 +639,10 @@ IpAddress::~IpAddress()
void IpAddress::Set( const struct sockaddr& addr ) void IpAddress::Set( const struct sockaddr& addr )
{ {
#if __MINGW32__ #if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 )
auto ai = (struct sockaddr_in*)&addr; struct sockaddr_in tmp;
memcpy( &tmp, &addr, sizeof( tmp ) );
auto ai = &tmp;
#else #else
auto ai = (const struct sockaddr_in*)&addr; auto ai = (const struct sockaddr_in*)&addr;
#endif #endif
@ -585,7 +663,7 @@ UdpListen::~UdpListen()
if( m_sock != -1 ) Close(); if( m_sock != -1 ) Close();
} }
bool UdpListen::Listen( int port ) bool UdpListen::Listen( uint16_t port )
{ {
assert( m_sock == -1 ); assert( m_sock == -1 );
@ -596,14 +674,14 @@ bool UdpListen::Listen( int port )
int val = 1; int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) ); setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif #endif
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
unsigned long reuse = 1; unsigned long reuse = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) ); setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
#else #else
int reuse = 1; int reuse = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) ); setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
#endif #endif
#if defined _WIN32 || defined __CYGWIN__ #if defined _WIN32
unsigned long broadcast = 1; unsigned long broadcast = 1;
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 ) if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
#else #else
@ -649,14 +727,14 @@ void UdpListen::Close()
m_sock = -1; m_sock = -1;
} }
const char* UdpListen::Read( size_t& len, IpAddress& addr ) const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout )
{ {
static char buf[2048]; static char buf[2048];
struct pollfd fd; struct pollfd fd;
fd.fd = (socket_t)m_sock; fd.fd = (socket_t)m_sock;
fd.events = POLLIN; fd.events = POLLIN;
if( poll( &fd, 1, 10 ) <= 0 ) return nullptr; if( poll( &fd, 1, timeout ) <= 0 ) return nullptr;
sockaddr sa; sockaddr sa;
socklen_t salen = sizeof( struct sockaddr ); socklen_t salen = sizeof( struct sockaddr );

View File

@ -23,12 +23,14 @@ public:
Socket( int sock ); Socket( int sock );
~Socket(); ~Socket();
bool Connect( const char* addr, int port ); bool Connect( const char* addr, uint16_t port );
bool ConnectBlocking( const char* addr, uint16_t port );
void Close(); void Close();
int Send( const void* buf, int len ); int Send( const void* buf, int len );
int GetSendBufSize(); int GetSendBufSize();
int ReadUpTo( void* buf, int len, int timeout );
bool Read( void* buf, int len, int timeout ); bool Read( void* buf, int len, int timeout );
template<typename ShouldExit> template<typename ShouldExit>
@ -74,7 +76,7 @@ public:
ListenSocket(); ListenSocket();
~ListenSocket(); ~ListenSocket();
bool Listen( int port, int backlog ); bool Listen( uint16_t port, int backlog );
Socket* Accept(); Socket* Accept();
void Close(); void Close();
@ -93,10 +95,10 @@ public:
UdpBroadcast(); UdpBroadcast();
~UdpBroadcast(); ~UdpBroadcast();
bool Open( const char* addr, int port ); bool Open( const char* addr, uint16_t port );
void Close(); void Close();
int Send( int port, const void* data, int len ); int Send( uint16_t port, const void* data, int len );
UdpBroadcast( const UdpBroadcast& ) = delete; UdpBroadcast( const UdpBroadcast& ) = delete;
UdpBroadcast( UdpBroadcast&& ) = delete; UdpBroadcast( UdpBroadcast&& ) = delete;
@ -134,10 +136,10 @@ public:
UdpListen(); UdpListen();
~UdpListen(); ~UdpListen();
bool Listen( int port ); bool Listen( uint16_t port );
void Close(); void Close();
const char* Read( size_t& len, IpAddress& addr ); const char* Read( size_t& len, IpAddress& addr, int timeout );
UdpListen( const UdpListen& ) = delete; UdpListen( const UdpListen& ) = delete;
UdpListen( UdpListen&& ) = delete; UdpListen( UdpListen&& ) = delete;

View File

@ -237,3 +237,13 @@ TRACY_API const char* GetThreadName( uint64_t id )
} }
} }
#ifdef __cplusplus
extern "C" {
#endif
TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); }
#ifdef __cplusplus
}
#endif

View File

@ -288,11 +288,10 @@ namespace contouring {
for (size_t i = 0; i < surrounding.size(); i++) { for (size_t i = 0; i < surrounding.size(); i++) {
auto &edits = surrounding[i]->getEdits(); auto &edits = surrounding[i]->getEdits();
auto offset = glm::ivec3(surrounding::g_corner_offsets[i]) * CHUNK_LENGTH; auto offset = glm::ivec3(surrounding::g_corner_offsets[i]) * CHUNK_LENGTH;
for (auto it = edits.end(); it != edits.begin();) { for (auto it = edits.begin(); it != edits.end(); ++it) {
it--; auto p = offset + glm::ivec3(glm::fromIdx(it->first));
auto p = offset + glm::ivec3(glm::fromIdx(it->idx));
if(p.x < SIZE && p.y < SIZE && p.z < SIZE) { if(p.x < SIZE && p.y < SIZE && p.z < SIZE) {
setCell(p.x, p.y, p.z, it->value); setCell(p.x, p.y, p.z, it->second.value);
} }
} }
} }

View File

@ -11,7 +11,7 @@
namespace tracy { namespace tracy {
class VkCtx; class VkCtx;
} }
typedef tracy::VkCtx* TracyVkCtx; typedef tracy::VkCtx* TracyVkCtxPtr;
namespace render::vk { namespace render::vk {
class SwapChain; class SwapChain;
@ -54,7 +54,7 @@ private:
VkQueue graphicsQueue; VkQueue graphicsQueue;
VkCommandPool graphicsPool; VkCommandPool graphicsPool;
std::vector<VkCommandBuffer> graphicsBuffers; std::vector<VkCommandBuffer> graphicsBuffers;
TracyVkCtx tracyCtx; TracyVkCtxPtr tracyCtx;
BufferGroup uniformBuffers; BufferGroup uniformBuffers;

View File

@ -16,7 +16,7 @@ public:
it->second -= deltaTime; it->second -= deltaTime;
if (it->second <= 0 && animate) { if (it->second <= 0 && animate) {
invalidate(it->first.idx); invalidate(it->first.idx);
edits.emplace_back(it->first); edits.emplace(it->first.idx, it->first);
it = futureEdits.erase(it); it = futureEdits.erase(it);
} else { } else {
it++; it++;

View File

@ -9,6 +9,7 @@
#include "../../core/net/io.hpp" #include "../../core/net/io.hpp"
#include "../../core/utils/logger.hpp" #include "../../core/utils/logger.hpp"
#include "Chunk.hpp" #include "Chunk.hpp"
#include <random>
using namespace world::client; using namespace world::client;
@ -29,6 +30,8 @@ void DistantUniverse::update(voxel_pos pos, float deltaTime) {
{ // Update alive areas { // Update alive areas
ZoneScopedN("World"); ZoneScopedN("World");
auto rng = std::mt19937(std::rand());
const auto contouringThreshold = rng.max() / (1 + contouring->getQueueSize());
for (auto& area: areas) { for (auto& area: areas) {
ZoneScopedN("Area"); ZoneScopedN("Area");
const bool chunkChangeArea = (false && area.second->move(glm::vec3(deltaTime))) || chunkChange; // TODO: area.velocity const bool chunkChangeArea = (false && area.second->move(glm::vec3(deltaTime))) || chunkChange; // TODO: area.velocity
@ -41,7 +44,7 @@ void DistantUniverse::update(voxel_pos pos, float deltaTime) {
if (glm::length2(diff - it_c->first) > glm::pow2(options.keepDistance)) { if (glm::length2(diff - it_c->first) > glm::pow2(options.keepDistance)) {
it_c = chunks.erase(it_c); it_c = chunks.erase(it_c);
} else { } else {
if(const auto neighbors = std::dynamic_pointer_cast<Chunk>(it_c->second)->update(deltaTime, true /*MAYBE: random update*/)) { if(const auto neighbors = std::dynamic_pointer_cast<Chunk>(it_c->second)->update(deltaTime, rng() < contouringThreshold)) {
contouring->onUpdate(std::make_pair(area.first, it_c->first), diff, chunks, neighbors.value()); contouring->onUpdate(std::make_pair(area.first, it_c->first), diff, chunks, neighbors.value());
} }
++it_c; ++it_c;
@ -290,24 +293,9 @@ bool DistantUniverse::onPacket(const data::out_view& buf, net::PacketFlags) {
if (!fill) if (!fill)
break; break;
if(const auto it = areas.find(fill->pos.first); it != areas.end()) { world::iterator::Apply<Chunk>(areas, *fill, [](std::shared_ptr<Chunk> &ck, chunk_pos, chunk_voxel_idx idx, Voxel, Voxel next, float delay) {
auto &chunks = it->second->setChunks(); ck->apply(Chunk::Edit{next, delay, idx});
auto iterator = world::iterator::Get(fill->shape, fill->radius); });
world::iterator::pair point;
while (iterator->next(point)) {
const voxel_pos offset = point.first;
const auto split = glm::splitIdx(fill->pos.second + offset);
if(chunks.inRange(split.first)) {
if(const auto chunk = it->second->setChunks().findInRange(split.first)) {
auto ck = std::dynamic_pointer_cast<Chunk>(chunk.value());
auto prev = ck->get(split.second);
const auto next = prev.filled(fill->val, point.second);
const auto delay = glm::length2(offset) / fill->radius * .05f;
ck->apply(Chunk::Edit{split.second, next, delay});
}
}
}
}
break; break;
} }
@ -395,27 +383,10 @@ void DistantUniverse::emit(const action::packet &action) {
peer.send(net::PacketWriter::Of(net::client_packet_type::FILL_SHAPE, *fill)); peer.send(net::PacketWriter::Of(net::client_packet_type::FILL_SHAPE, *fill));
if (options.editPrediction) { if (options.editPrediction) {
ZoneScopedN("Fill"); ZoneScopedN("Fill");
const auto keepDelay = 10 + (peer.getRTT() / 20000.f); // 10s + 50RTT const auto keepDelay = 5 + (peer.getRTT() / 20000.f); // 5s + 50RTT
if(const auto it = areas.find(fill->pos.first); it != areas.end()) { world::iterator::Apply<Chunk>(areas, *fill, [&](std::shared_ptr<Chunk> &ck, chunk_pos, chunk_voxel_idx idx, Voxel, Voxel next, float delay) {
auto &chunks = it->second->setChunks(); ck->addFutureEdit(Chunk::Edit{next, keepDelay - delay * 2, idx}, delay);
auto iterator = world::iterator::Get(fill->shape, fill->radius); });
world::iterator::pair point;
while (iterator->next(point)) {
const voxel_pos offset = point.first;
const auto split = glm::splitIdx(fill->pos.second + offset);
if(chunks.inRange(split.first)) {
if(const auto chunk = it->second->setChunks().findInRange(split.first)) {
auto ck = std::dynamic_pointer_cast<Chunk>(chunk.value());
auto prev = ck->get(split.second);
const auto next = prev.filled(fill->val, point.second);
if(prev.value != next.value) {
const auto delay = glm::length2(offset) / fill->radius * .05f;
ck->addFutureEdit(Chunk::Edit{split.second, next, keepDelay - delay * 2}, delay);
}
}
}
}
}
} }
} else { } else {
LOG_W("Bad action " << action.index()); LOG_W("Bad action " << action.index());

View File

@ -43,7 +43,9 @@ public:
void* data() { return buffer.writeTo(0); } void* data() { return buffer.writeTo(0); }
void reserve(size_t target) { void reserve(size_t target) {
if (target >= buffer.siz - buffer.cur) { if (target >= buffer.siz - buffer.cur) {
buffer.ptr = (uint8_t*)realloc(buffer.ptr, target + buffer.cur); const auto size = target + buffer.cur;
buffer.ptr = (uint8_t *)realloc(buffer.ptr, size);
buffer.siz = size;
} }
} }
void resize(size_t target) { void resize(size_t target) {

View File

@ -13,11 +13,13 @@ namespace world {
Chunk(std::istream& str, bool rle = RLE); Chunk(std::istream& str, bool rle = RLE);
virtual ~Chunk(); virtual ~Chunk();
struct Edit { struct EditBody {
chunk_voxel_idx idx;
Voxel value; Voxel value;
float delay; float delay;
}; };
struct Edit: EditBody {
chunk_voxel_idx idx;
};
/// Get voxel from index /// Get voxel from index
inline const Voxel& get(chunk_voxel_idx idx) const { inline const Voxel& get(chunk_voxel_idx idx) const {

View File

@ -12,9 +12,9 @@ EdittableChunk::~EdittableChunk() { }
std::optional<Faces> EdittableChunk::update(float deltaTime, bool animate) { std::optional<Faces> EdittableChunk::update(float deltaTime, bool animate) {
ZoneScopedN("Chunk"); ZoneScopedN("Chunk");
for(auto it = edits.begin(); it != edits.end();) { for(auto it = edits.begin(); it != edits.end();) {
it->delay -= deltaTime; it->second.delay -= deltaTime;
if(it->delay <= 0 && animate) { if(it->second.delay <= 0 && animate) {
invalidate(it->idx); invalidate(it->first);
it = edits.erase(it); it = edits.erase(it);
} else { } else {
it++; it++;
@ -42,8 +42,9 @@ void EdittableChunk::apply(const Edit& edit) {
const auto prev = voxels[edit.idx]; const auto prev = voxels[edit.idx];
if(prev.value != edit.value.value) { if(prev.value != edit.value.value) {
voxels[edit.idx] = edit.value; voxels[edit.idx] = edit.value;
edits.erase(edit.idx);
if(edit.delay > 0) { if(edit.delay > 0) {
edits.emplace_back<Edit>({edit.idx, prev, edit.delay}); edits.emplace(edit.idx, EditBody{prev, edit.delay});
} else { } else {
invalidate(edit.idx); invalidate(edit.idx);
} }

View File

@ -22,8 +22,9 @@ namespace world::client {
void apply(const Chunk::Edit &edit); void apply(const Chunk::Edit &edit);
using edits_t = robin_hood::unordered_map<chunk_voxel_idx, EditBody>;
/// Get pending changes /// Get pending changes
const std::vector<Chunk::Edit> &getEdits() const { return edits; } const edits_t &getEdits() const { return edits; }
static std::optional<chunk_voxel_idx> getNeighborIdx(chunk_voxel_idx idx, Face dir); static std::optional<chunk_voxel_idx> getNeighborIdx(chunk_voxel_idx idx, Face dir);
@ -31,8 +32,7 @@ namespace world::client {
EdittableChunk(); EdittableChunk();
/// Animated changes /// Animated changes
/// MAYBE: sort by delay edits_t edits;
std::vector<Chunk::Edit> edits;
/// Require update /// Require update
bool upToDate = true; bool upToDate = true;
/// Neighbors to update /// Neighbors to update

View File

@ -18,6 +18,34 @@ protected:
/// From -radius to radius /// From -radius to radius
std::unique_ptr<Abstract> Get(action::Shape, uint16_t radius); std::unique_ptr<Abstract> Get(action::Shape, uint16_t radius);
template<typename Chunk, typename area_map, typename CB>
void Apply(area_map &areas, action::FillShape fill, const CB& callback) {
if(const auto it = areas.find(fill.pos.first); it != areas.end()) {
auto &chunks = it->second->setChunks();
auto iterator = Get(fill.shape, fill.radius);
pair point;
typename std::shared_ptr<Chunk> ck = nullptr;
chunk_pos ck_pos = chunk_pos(INT32_MAX);
while (iterator->next(point)) {
const voxel_pos offset = point.first;
const auto split = glm::splitIdx(fill.pos.second + offset);
if (split.first != ck_pos && chunks.inRange(split.first)) {
if(auto it = chunks.find(split.first); it != chunks.end()) {
ck = std::dynamic_pointer_cast<Chunk>(it->second);
ck_pos = split.first;
}
}
if (split.first == ck_pos) {
auto prev = ck->get(split.second);
const auto next = prev.filled(fill.val, point.second);
if (prev.value != next.value) {
callback(ck, ck_pos, split.second, prev, next, glm::length2(offset) / fill.radius * .05f);
}
}
}
}
}
class Cube final: public Abstract { class Cube final: public Abstract {
public: public:
bool next(pair&) override; bool next(pair&) override;

View File

@ -58,6 +58,14 @@ public:
call(&peer); call(&peer);
} }
} }
template<typename P>
bool anyPeer(P predicate) {
for(auto& peer: peers) {
if(predicate(&peer))
return true;
}
return false;
}
private: private:
std::forward_list<Peer> peers; std::forward_list<Peer> peers;

View File

@ -16,8 +16,9 @@ public:
std::optional<Item> replace(chunk_voxel_idx idx, const Voxel &val, float delay = 0) override { std::optional<Item> replace(chunk_voxel_idx idx, const Voxel &val, float delay = 0) override {
const auto res = voxels[idx]; const auto res = voxels[idx];
set(idx, val); set(idx, val);
edits.erase(idx);
if(delay > 0) { if(delay > 0) {
edits.emplace_back<Edit>({idx, res, delay}); edits.emplace(idx, EditBody{res, delay});
} else { } else {
invalidate(idx); invalidate(idx);
} }

View File

@ -231,7 +231,7 @@ void Universe::pull() {
if (data == nullptr) if (data == nullptr)
return; return;
if (data->pendingEdits.empty() && peer->queueSize(net::server::queue::EDIT) == 0) { if (!data->pendingEdits.empty() && peer->queueSize(net::server::queue::EDIT) == 0) {
peer->send(net::PacketWriter::Of(net::server_packet_type::EDITS, data->pendingEdits.front())); peer->send(net::PacketWriter::Of(net::server_packet_type::EDITS, data->pendingEdits.front()));
data->pendingEdits.pop(); data->pendingEdits.pop();
} }
@ -620,8 +620,9 @@ bool Universe::onPacket(net::server::Peer *peer, const data::out_view &buf, net:
if (!packet.read(cpos)) if (!packet.read(cpos))
break; break;
const auto dist = glm::length2(areaOffset - cpos); const auto dist = glm::length2(areaOffset - cpos);
if (dist <= glm::pow2(loadDistance) && chunks.inRange(cpos) && chunks.findInRange(cpos).has_value()) { if (dist <= glm::pow2(loadDistance) && chunks.inRange(cpos)) {
data->pushChunk(std::make_pair(id, cpos), dist); if (chunks.findInRange(cpos).has_value())
data->pushChunk(std::make_pair(id, cpos), dist);
} else { } else {
LOG_T("Request out of range chunk"); LOG_T("Request out of range chunk");
} }
@ -692,59 +693,41 @@ bool Universe::isAreaFree(const area_<voxel_pos> &pos, const geometry::Shape sha
world::ItemList Universe::set(const area_<voxel_pos>& pos, int radius, action::Shape shape, const Voxel& val) { world::ItemList Universe::set(const area_<voxel_pos>& pos, int radius, action::Shape shape, const Voxel& val) {
ZoneScopedN("Fill"); ZoneScopedN("Fill");
ItemList list; ItemList list;
if(const auto it = areas.find(pos.first); it != areas.end()) { const bool stupidClient = host.anyPeer([&](net::server::Peer *peer) {
robin_hood::unordered_map<chunk_pos, std::vector<Chunk::Edit>> edits; auto data = peer->getCtx<net_client>();
auto &chunks = it->second->setChunks(); return data && !data->handleEdits;
auto iterator = world::iterator::Get(shape, radius); });
world::iterator::pair point; robin_hood::unordered_map<chunk_pos, std::vector<Chunk::Edit>> edits;
while (iterator->next(point)) { world::iterator::Apply<Chunk>(areas, world::action::FillShape(pos, val, shape, radius),
const voxel_pos offset = point.first; [&](std::shared_ptr<Chunk>& ck, chunk_pos ck_pos, chunk_voxel_idx idx, Voxel /*prev*/, Voxel next, float delay) {
const auto split = glm::splitIdx(pos.second + offset); if (stupidClient)
if(chunks.inRange(split.first)) { edits[ck_pos].push_back(Chunk::Edit{next, delay, idx});
if(const auto chunk = it->second->setChunks().findInRange(split.first)) { //TODO: apply break table
auto ck = std::dynamic_pointer_cast<Chunk>(chunk.value()); //TODO: inventory
auto prev = ck->get(split.second); ck->replace(idx, next, delay);
const auto next = prev.filled(val, point.second); });
if(prev.value != next.value) { if (stupidClient && !edits.empty()) {
//TODO: apply break table ZoneScopedN("Packet");
//TODO: inventory size_t size = sizeof(area_id);
const auto delay = glm::length2(offset) / radius * .05f; for(const auto& part: edits) {
edits[split.first].push_back(Chunk::Edit{split.second, next, delay}); size += sizeof(chunk_pos);
ck->replace(split.second, next, delay); size += sizeof(chunk_voxel_idx);
} size += sizeof(Chunk::Edit) * part.second.size();
}
}
} }
auto packet = net::PacketWriter(net::server_packet_type::EDITS, size);
bool stupidClient = false; packet.write(pos.first);
for(const auto& part: edits) {
packet.write(part.first);
packet.write<chunk_voxel_idx>(part.second.size());
packet.write(part.second.data(), part.second.size() * sizeof(Chunk::Edit));
}
auto buffer = packet.finish();
host.iterPeers([&](net::server::Peer *peer) { host.iterPeers([&](net::server::Peer *peer) {
//MAYBE: only in range
auto data = peer->getCtx<net_client>(); auto data = peer->getCtx<net_client>();
if (data && !data->handleEdits) if (data && !data->handleEdits)
stupidClient = true; peer->send(buffer, net::server::queue::CHUNK);
}); });
if (stupidClient) {
ZoneScopedN("Packet");
size_t size = sizeof(area_id);
for(const auto& part: edits) {
size += sizeof(chunk_pos);
size += sizeof(chunk_voxel_idx);
size += sizeof(Chunk::Edit) * part.second.size();
}
auto packet = net::PacketWriter(net::server_packet_type::EDITS, size);
packet.write(pos.first);
for(const auto& part: edits) {
packet.write(part.first);
packet.write<chunk_voxel_idx>(part.second.size());
packet.write(part.second.data(), part.second.size() * sizeof(Chunk::Edit));
}
auto buffer = packet.finish();
host.iterPeers([&](net::server::Peer *peer) {
//MAYBE: only in range
auto data = peer->getCtx<net_client>();
if (data && !data->handleEdits)
peer->send(buffer, net::server::queue::CHUNK);
});
}
} }
return list; return list;
} }