1
0
Fork 0

Update tracy to 0.7.3

windows
May B. 2020-11-07 22:31:17 +01:00
parent fa482b37f3
commit d7012d1554
36 changed files with 1728 additions and 668 deletions

3
deps/tracy/AUTHORS vendored
View File

@ -9,3 +9,6 @@ Dedmen Miller <dedmen@dedmen.de> (find zone bug fixes, improv
Michał Cichoń <michcic@gmail.com> (OSX call stack decoding backport)
Thales Sabino <thales@codeplay.com> (OpenCL support)
Andrew Depke <andrewdepke@gmail.com> (Direct3D 12 support)
Simonas Kazlauskas <git@kazlauskas.me> (OSX CI, external bindings)
Jakub Žádník <kubouch@gmail.com> (csvexport utility)
Andrey Voroshilov <andrew.voroshilov@gmail.com> (multi-DLL fixes)

110
deps/tracy/Tracy.hpp vendored
View File

@ -11,6 +11,9 @@
#define ZoneNamedC(x,y,z)
#define ZoneNamedNC(x,y,z,w)
#define ZoneTransient(x,y)
#define ZoneTransientN(x,y,z)
#define ZoneScoped
#define ZoneScopedN(x)
#define ZoneScopedC(x)
@ -50,12 +53,22 @@
#define TracyAlloc(x,y)
#define TracyFree(x)
#define TracySecureAlloc(x,y)
#define TracySecureFree(x)
#define TracyAllocN(x,y,z)
#define TracyFreeN(x,y)
#define TracySecureAllocN(x,y,z)
#define TracySecureFreeN(x,y)
#define ZoneNamedS(x,y,z)
#define ZoneNamedNS(x,y,z,w)
#define ZoneNamedCS(x,y,z,w)
#define ZoneNamedNCS(x,y,z,w,a)
#define ZoneTransientS(x,y,z)
#define ZoneTransientNS(x,y,z,w)
#define ZoneScopedS(x)
#define ZoneScopedNS(x,y)
#define ZoneScopedCS(x,y)
@ -63,6 +76,13 @@
#define TracyAllocS(x,y,z)
#define TracyFreeS(x,y)
#define TracySecureAllocS(x,y,z)
#define TracySecureFreeS(x,y)
#define TracyAllocNS(x,y,z,w)
#define TracyFreeNS(x,y,z)
#define TracySecureAllocNS(x,y,z,w)
#define TracySecureFreeNS(x,y,z)
#define TracyMessageS(x,y,z)
#define TracyMessageLS(x,y)
@ -71,23 +91,32 @@
#define TracyParameterRegister(x)
#define TracyParameterSetup(x,y,z,w)
#define TracyIsConnected false
#else
#include <string.h>
#include "client/TracyLock.hpp"
#include "client/TracyProfiler.hpp"
#include "client/TracyScoped.hpp"
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), TRACY_CALLSTACK, active );
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, TRACY_CALLSTACK, active );
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), TRACY_CALLSTACK, active );
#else
# define ZoneNamed( varname, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedN( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedC( varname, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedNC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamed( varname, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedN( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedC( varname, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneNamedNC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), active );
# define ZoneTransient( varname, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, active );
# define ZoneTransientN( varname, name, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), active );
#endif
#define ZoneScoped ZoneNamed( ___tracy_scoped_zone, true )
@ -109,13 +138,13 @@
#define FrameImage( image, width, height, offset, flip ) tracy::Profiler::SendFrameImage( image, width, height, offset, flip );
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static const tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockable( type, varname ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracyLockableN( type, varname, desc ) tracy::Lockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockable( type, varname ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, #type " " #varname, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define TracySharedLockableN( type, varname, desc ) tracy::SharedLockable<type> varname { [] () -> const tracy::SourceLocationData* { static constexpr tracy::SourceLocationData srcloc { nullptr, desc, __FILE__, __LINE__, 0 }; return &srcloc; }() };
#define LockableBase( type ) tracy::Lockable<type>
#define SharedLockableBase( type ) tracy::SharedLockable<type>
#define LockMark( varname ) static const tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
#define LockMark( varname ) static constexpr tracy::SourceLocationData __tracy_lock_location_##varname { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; varname.Mark( &__tracy_lock_location_##varname );
#define LockableName( varname, txt, size ) varname.CustomName( txt, size );
#define TracyPlot( name, val ) tracy::Profiler::PlotData( name, val );
@ -129,31 +158,55 @@
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, TRACY_CALLSTACK );
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, TRACY_CALLSTACK );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK );
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, false );
# define TracyFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, false );
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAllocCallstack( ptr, size, TRACY_CALLSTACK, true );
# define TracySecureFree( ptr ) tracy::Profiler::MemFreeCallstack( ptr, TRACY_CALLSTACK, true );
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, false, name );
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, false, name );
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, TRACY_CALLSTACK, true, name );
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, TRACY_CALLSTACK, true, name );
#else
# define TracyMessage( txt, size ) tracy::Profiler::Message( txt, size, 0 );
# define TracyMessageL( txt ) tracy::Profiler::Message( txt, 0 );
# define TracyMessageC( txt, size, color ) tracy::Profiler::MessageColor( txt, size, color, 0 );
# define TracyMessageLC( txt, color ) tracy::Profiler::MessageColor( txt, color, 0 );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size );
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr );
# define TracyAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, false );
# define TracyFree( ptr ) tracy::Profiler::MemFree( ptr, false );
# define TracySecureAlloc( ptr, size ) tracy::Profiler::MemAlloc( ptr, size, true );
# define TracySecureFree( ptr ) tracy::Profiler::MemFree( ptr, true );
# define TracyAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, false, name );
# define TracyFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, false, name );
# define TracySecureAllocN( ptr, size, name ) tracy::Profiler::MemAllocNamed( ptr, size, true, name );
# define TracySecureFreeN( ptr, name ) tracy::Profiler::MemFreeNamed( ptr, true, name );
#endif
#ifdef TRACY_HAS_CALLSTACK
# define ZoneNamedS( varname, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedCS( varname, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedS( varname, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedCS( varname, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneNamedNCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::ScopedZone varname( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define ZoneTransientS( varname, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), nullptr, 0, depth, active );
# define ZoneTransientNS( varname, name, depth, active ) tracy::ScopedZone varname( __LINE__, __FILE__, strlen( __FILE__ ), __FUNCTION__, strlen( __FUNCTION__ ), name, strlen( name ), depth, active );
# define ZoneScopedS( depth ) ZoneNamedS( ___tracy_scoped_zone, depth, true )
# define ZoneScopedNS( name, depth ) ZoneNamedNS( ___tracy_scoped_zone, name, depth, true )
# define ZoneScopedCS( color, depth ) ZoneNamedCS( ___tracy_scoped_zone, color, depth, true )
# define ZoneScopedNCS( name, color, depth ) ZoneNamedNCS( ___tracy_scoped_zone, name, color, depth, true )
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth );
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth );
# define TracyAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, false );
# define TracyFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, false );
# define TracySecureAllocS( ptr, size, depth ) tracy::Profiler::MemAllocCallstack( ptr, size, depth, true );
# define TracySecureFreeS( ptr, depth ) tracy::Profiler::MemFreeCallstack( ptr, depth, true );
# define TracyAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, false, name );
# define TracyFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, false, name );
# define TracySecureAllocNS( ptr, size, depth, name ) tracy::Profiler::MemAllocCallstackNamed( ptr, size, depth, true, name );
# define TracySecureFreeNS( ptr, depth, name ) tracy::Profiler::MemFreeCallstackNamed( ptr, depth, true, name );
# define TracyMessageS( txt, size, depth ) tracy::Profiler::Message( txt, size, depth );
# define TracyMessageLS( txt, depth ) tracy::Profiler::Message( txt, depth );
@ -165,6 +218,9 @@
# define ZoneNamedCS( varname, color, depth, active ) ZoneNamedC( varname, color, active )
# define ZoneNamedNCS( varname, name, color, depth, active ) ZoneNamedNC( varname, name, color, active )
# define ZoneTransientS( varname, depth, active ) ZoneTransient( varname, active )
# define ZoneTransientNS( varname, name, depth, active ) ZoneTransientN( varname, name, active )
# define ZoneScopedS( depth ) ZoneScoped
# define ZoneScopedNS( name, depth ) ZoneScopedN( name )
# define ZoneScopedCS( color, depth ) ZoneScopedC( color )
@ -172,6 +228,13 @@
# define TracyAllocS( ptr, size, depth ) TracyAlloc( ptr, size )
# define TracyFreeS( ptr, depth ) TracyFree( ptr )
# define TracySecureAllocS( ptr, size, depth ) TracySecureAlloc( ptr, size )
# define TracySecureFreeS( ptr, depth ) TracySecureFree( ptr )
# define TracyAllocNS( ptr, size, depth, name ) TracyAlloc( ptr, size, name )
# define TracyFreeNS( ptr, depth, name ) TracyFree( ptr, name )
# define TracySecureAllocNS( ptr, size, depth, name ) TracySecureAlloc( ptr, size, name )
# define TracySecureFreeNS( ptr, depth, name ) TracySecureFree( ptr, name )
# define TracyMessageS( txt, size, depth ) TracyMessage( txt, size )
# define TracyMessageLS( txt, depth ) TracyMessageL( txt )
@ -181,6 +244,7 @@
#define TracyParameterRegister( cb ) tracy::Profiler::ParameterRegister( cb );
#define TracyParameterSetup( idx, name, isBool, val ) tracy::Profiler::ParameterSetup( idx, name, isBool, val );
#define TracyIsConnected tracy::GetProfiler().IsConnected()
#endif

42
deps/tracy/TracyC.h vendored
View File

@ -11,6 +11,11 @@
extern "C" {
#endif
TRACY_API void ___tracy_set_thread_name( const char* name );
#define TracyCSetThreadName( name ) ___tracy_set_thread_name( name );
#ifndef TRACY_ENABLE
typedef const void* TracyCZoneCtx;
@ -26,6 +31,8 @@ typedef const void* TracyCZoneCtx;
#define TracyCAlloc(x,y)
#define TracyCFree(x)
#define TracyCSecureAlloc(x,y)
#define TracyCSecureFree(x)
#define TracyCFrameMark
#define TracyCFrameMarkNamed(x)
@ -47,6 +54,8 @@ typedef const void* TracyCZoneCtx;
#define TracyCAllocS(x,y,z)
#define TracyCFreeS(x,y)
#define TracyCSecureAllocS(x,y,z)
#define TracyCSecureFreeS(x,y)
#define TracyCMessageS(x,y,z)
#define TracyCMessageLS(x,y)
@ -81,8 +90,9 @@ struct ___tracy_c_zone_context
// This struct, as visible to user, is immutable, so treat it as if const was declared here.
typedef /*const*/ struct ___tracy_c_zone_context TracyCZoneCtx;
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, const char* function );
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz );
TRACY_API void ___tracy_init_thread(void);
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz );
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin( const struct ___tracy_source_location_data* srcloc, int active );
TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___tracy_source_location_data* srcloc, int depth, int active );
@ -112,10 +122,10 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value );
#define TracyCZoneValue( ctx, value ) ___tracy_emit_zone_value( ctx, value );
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size );
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth );
TRACY_API void ___tracy_emit_memory_free( const void* ptr );
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth );
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure );
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure );
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure );
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure );
TRACY_API void ___tracy_emit_message( const char* txt, size_t size, int callstack );
TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack );
@ -123,16 +133,20 @@ TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t co
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK )
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK )
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 0 )
# define TracyCFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 0 )
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc_callstack( ptr, size, TRACY_CALLSTACK, 1 )
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free_callstack( ptr, TRACY_CALLSTACK, 1 )
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, TRACY_CALLSTACK );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, TRACY_CALLSTACK );
# define TracyCMessageC( txt, size, color ) ___tracy_emit_messageC( txt, size, color, TRACY_CALLSTACK );
# define TracyCMessageLC( txt, color ) ___tracy_emit_messageLC( txt, color, TRACY_CALLSTACK );
#else
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size );
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr );
# define TracyCAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 0 );
# define TracyCFree( ptr ) ___tracy_emit_memory_free( ptr, 0 );
# define TracyCSecureAlloc( ptr, size ) ___tracy_emit_memory_alloc( ptr, size, 1 );
# define TracyCSecureFree( ptr ) ___tracy_emit_memory_free( ptr, 1 );
# define TracyCMessage( txt, size ) ___tracy_emit_message( txt, size, 0 );
# define TracyCMessageL( txt ) ___tracy_emit_messageL( txt, 0 );
@ -166,8 +180,10 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
# define TracyCZoneCS( ctx, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { NULL, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCZoneNCS( ctx, name, color, depth, active ) static const struct ___tracy_source_location_data TracyConcat(__tracy_source_location,__LINE__) = { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; TracyCZoneCtx ctx = ___tracy_emit_zone_begin_callstack( &TracyConcat(__tracy_source_location,__LINE__), depth, active );
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth )
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth )
# define TracyCAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 0 )
# define TracyCFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 0 )
# define TracyCSecureAllocS( ptr, size, depth ) ___tracy_emit_memory_alloc_callstack( ptr, size, depth, 1 )
# define TracyCSecureFreeS( ptr, depth ) ___tracy_emit_memory_free_callstack( ptr, depth, 1 )
# define TracyCMessageS( txt, size, depth ) ___tracy_emit_message( txt, size, depth );
# define TracyCMessageLS( txt, depth ) ___tracy_emit_messageL( txt, depth );
@ -181,6 +197,8 @@ TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size );
# define TracyCAllocS( ptr, size, depth ) TracyCAlloc( ptr, size )
# define TracyCFreeS( ptr, depth ) TracyCFree( ptr )
# define TracyCSecureAllocS( ptr, size, depth ) TracyCSecureAlloc( ptr, size )
# define TracyCSecureFreeS( ptr, depth ) TracyCSecureFree( ptr )
# define TracyCMessageS( txt, size, depth ) TracyCMessage( txt, size )
# define TracyCMessageLS( txt, depth ) TracyCMessageL( txt )

View File

@ -15,6 +15,10 @@
#ifdef TRACY_ENABLE
#ifdef _MSC_VER
# pragma warning(push, 0)
#endif
#include "common/tracy_lz4.cpp"
#include "client/TracyProfiler.cpp"
#include "client/TracyCallstack.cpp"
@ -42,6 +46,7 @@
#ifdef _MSC_VER
# pragma comment(lib, "ws2_32.lib")
# pragma comment(lib, "dbghelp.lib")
# pragma warning(pop)
#endif
#endif

View File

@ -52,21 +52,21 @@ public:
#define TracyGpuContext tracy::InitRPMallocThread(); tracy::GetGpuCtx().ptr = (tracy::GpuCtx*)tracy::tracy_malloc( sizeof( tracy::GpuCtx ) ); new(tracy::GetGpuCtx().ptr) tracy::GpuCtx;
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyGpuNamedZone( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), TRACY_CALLSTACK, active );
# define TracyGpuZone( name ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, TRACY_CALLSTACK, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, TRACY_CALLSTACK, true )
#else
# define TracyGpuNamedZone( varname, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuNamedZone( varname, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuNamedZoneC( varname, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), active );
# define TracyGpuZone( name ) TracyGpuNamedZone( ___tracy_gpu_zone, name, true )
# define TracyGpuZoneC( name, color ) TracyGpuNamedZoneC( ___tracy_gpu_zone, name, color, true )
#endif
#define TracyGpuCollect tracy::GetGpuCtx().ptr->Collect();
#ifdef TRACY_HAS_CALLSTACK
# define TracyGpuNamedZoneS( varname, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuNamedZoneS( varname, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuNamedZoneCS( varname, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::GpuCtxScope varname( &TracyConcat(__tracy_gpu_source_location,__LINE__), depth, active );
# define TracyGpuZoneS( name, depth ) TracyGpuNamedZoneS( ___tracy_gpu_zone, name, depth, true )
# define TracyGpuZoneCS( name, color, depth ) TracyGpuNamedZoneCS( ___tracy_gpu_zone, name, color, depth, true )
#else
@ -110,7 +110,7 @@ public:
MemWrite( &item->gpuNewContext.thread, thread );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, (uint8_t)bits );
MemWrite( &item->gpuNewContext.flags, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.type, GpuContextType::OpenGl );
#ifdef TRACY_ON_DEMAND
@ -215,6 +215,8 @@ public:
const auto queryId = GetGpuCtx().ptr->NextQueryId();
glQueryCounter( GetGpuCtx().ptr->TranslateOpenGlQueryId( queryId ), GL_TIMESTAMP );
GetProfiler().SendCallstack( depth );
const auto thread = GetThreadHandle();
TracyLfqPrepare( QueueType::GpuZoneBeginCallstack );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
@ -223,8 +225,6 @@ public:
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, GetGpuCtx().ptr->GetId() );
TracyLfqCommit;
GetProfiler().SendCallstack( depth );
}
tracy_force_inline ~GpuCtxScope()

View File

@ -4,6 +4,7 @@
#if !defined TRACY_ENABLE
#define TracyVkContext(x,y,z,w) nullptr
#define TracyVkContextCalibrated(x,y,z,w,a,b) nullptr
#define TracyVkDestroy(x)
#define TracyVkNamedZone(c,x,y,z,w)
#define TracyVkNamedZoneC(c,x,y,z,w,a)
@ -19,10 +20,9 @@
namespace tracy
{
class VkCtxScope {};
class VkCtx;
}
using TracyVkCtx = tracy::VkCtx*;
using TracyVkCtx = void*;
#else
@ -43,16 +43,36 @@ class VkCtx
enum { QueryCount = 64 * 1024 };
public:
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
VkCtx( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT, PFN_vkGetCalibratedTimestampsEXT _vkGetCalibratedTimestampsEXT )
: m_device( device )
, m_timeDomain( VK_TIME_DOMAIN_DEVICE_EXT )
, m_context( GetGpuCtxCounter().fetch_add( 1, std::memory_order_relaxed ) )
, m_head( 0 )
, m_tail( 0 )
, m_oldCnt( 0 )
, m_queryCount( QueryCount )
, m_vkGetCalibratedTimestampsEXT( _vkGetCalibratedTimestampsEXT )
{
assert( m_context != 255 );
if( _vkGetPhysicalDeviceCalibrateableTimeDomainsEXT && _vkGetCalibratedTimestampsEXT )
{
uint32_t num;
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, nullptr );
if( num > 4 ) num = 4;
VkTimeDomainEXT data[4];
_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT( physdev, &num, data );
for( uint32_t i=0; i<num; i++ )
{
// TODO VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT
if( data[i] == VK_TIME_DOMAIN_QUERY_PERFORMANCE_COUNTER_EXT )
{
m_timeDomain = data[i];
break;
}
}
}
VkPhysicalDeviceProperties prop;
vkGetPhysicalDeviceProperties( physdev, &prop );
const float period = prop.limits.timestampPeriod;
@ -82,21 +102,56 @@ public:
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
int64_t tcpu, tgpu;
if( m_timeDomain == VK_TIME_DOMAIN_DEVICE_EXT )
{
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, m_query, 0 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
int64_t tcpu = Profiler::GetTime();
int64_t tgpu;
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
tcpu = Profiler::GetTime();
vkGetQueryPoolResults( device, m_query, 0, 1, sizeof( tgpu ), &tgpu, sizeof( tgpu ), VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
vkBeginCommandBuffer( cmdbuf, &beginInfo );
vkCmdResetQueryPool( cmdbuf, m_query, 0, 1 );
vkEndCommandBuffer( cmdbuf );
vkQueueSubmit( queue, 1, &submitInfo, VK_NULL_HANDLE );
vkQueueWaitIdle( queue );
}
else
{
enum { NumProbes = 32 };
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation[NumProbes];
for( int i=0; i<NumProbes; i++ )
{
_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, deviation+i );
}
uint64_t minDeviation = deviation[0];
for( int i=1; i<NumProbes; i++ )
{
if( minDeviation > deviation[i] )
{
minDeviation = deviation[i];
}
}
m_deviation = minDeviation * 3 / 2;
m_qpcToNs = int64_t( 1000000000. / GetFrequencyQpc() );
Calibrate( device, m_prevCalibration, tgpu );
tcpu = Profiler::GetTime();
}
uint8_t flags = 0;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) flags |= GpuContextCalibration;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuNewContext );
@ -105,7 +160,7 @@ public:
memset( &item->gpuNewContext.thread, 0, sizeof( item->gpuNewContext.thread ) );
MemWrite( &item->gpuNewContext.period, period );
MemWrite( &item->gpuNewContext.context, m_context );
MemWrite( &item->gpuNewContext.accuracyBits, uint8_t( 0 ) );
MemWrite( &item->gpuNewContext.flags, flags );
MemWrite( &item->gpuNewContext.type, GpuContextType::Vulkan );
#ifdef TRACY_ON_DEMAND
@ -133,6 +188,8 @@ public:
{
vkCmdResetQueryPool( cmdbuf, m_query, 0, m_queryCount );
m_head = m_tail = 0;
int64_t tgpu;
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT ) Calibrate( m_device, m_prevCalibration, tgpu );
return;
}
#endif
@ -164,6 +221,25 @@ public:
Profiler::QueueSerialFinish();
}
if( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT )
{
int64_t tgpu, tcpu;
Calibrate( m_device, tcpu, tgpu );
const auto refCpu = Profiler::GetTime();
const auto delta = tcpu - m_prevCalibration;
if( delta > 0 )
{
m_prevCalibration = tcpu;
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuCalibration );
MemWrite( &item->gpuCalibration.gpuTime, tgpu );
MemWrite( &item->gpuCalibration.cpuTime, refCpu );
MemWrite( &item->gpuCalibration.cpuDelta, delta );
MemWrite( &item->gpuCalibration.context, m_context );
Profiler::QueueSerialFinish();
}
}
vkCmdResetQueryPool( cmdbuf, m_query, m_tail, cnt );
m_tail += cnt;
@ -184,8 +260,35 @@ private:
return m_context;
}
tracy_force_inline void Calibrate( VkDevice device, int64_t& tCpu, int64_t& tGpu )
{
assert( m_timeDomain != VK_TIME_DOMAIN_DEVICE_EXT );
VkCalibratedTimestampInfoEXT spec[2] = {
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, VK_TIME_DOMAIN_DEVICE_EXT },
{ VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_EXT, nullptr, m_timeDomain },
};
uint64_t ts[2];
uint64_t deviation;
do
{
m_vkGetCalibratedTimestampsEXT( device, 2, spec, ts, &deviation );
}
while( deviation > m_deviation );
#if defined _WIN32 || defined __CYGWIN__
tGpu = ts[0];
tCpu = ts[1] * m_qpcToNs;
#else
assert( false );
#endif
}
VkDevice m_device;
VkQueryPool m_query;
VkTimeDomainEXT m_timeDomain;
uint64_t m_deviation;
int64_t m_qpcToNs;
int64_t m_prevCalibration;
uint8_t m_context;
unsigned int m_head;
@ -194,6 +297,8 @@ private:
unsigned int m_queryCount;
int64_t* m_res;
PFN_vkGetCalibratedTimestampsEXT m_vkGetCalibratedTimestampsEXT;
};
class VkCtxScope
@ -237,6 +342,8 @@ public:
const auto queryId = ctx->NextQueryId();
vkCmdWriteTimestamp( cmdbuf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, ctx->m_query, queryId );
GetProfiler().SendCallstack( depth );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::GpuZoneBeginCallstackSerial );
MemWrite( &item->gpuZoneBegin.cpuTime, Profiler::GetTime() );
@ -245,8 +352,6 @@ public:
MemWrite( &item->gpuZoneBegin.queryId, uint16_t( queryId ) );
MemWrite( &item->gpuZoneBegin.context, ctx->GetId() );
Profiler::QueueSerialFinish();
GetProfiler().SendCallstack( depth );
}
tracy_force_inline ~VkCtxScope()
@ -272,11 +377,11 @@ private:
VkCtx* m_ctx;
};
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf )
static inline VkCtx* CreateVkContext( VkPhysicalDevice physdev, VkDevice device, VkQueue queue, VkCommandBuffer cmdbuf, PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsEXT gpdctd, PFN_vkGetCalibratedTimestampsEXT gct )
{
InitRPMallocThread();
auto ctx = (VkCtx*)tracy_malloc( sizeof( VkCtx ) );
new(ctx) VkCtx( physdev, device, queue, cmdbuf );
new(ctx) VkCtx( physdev, device, queue, cmdbuf, gpdctd, gct );
return ctx;
}
@ -290,24 +395,25 @@ static inline void DestroyVkContext( VkCtx* ctx )
using TracyVkCtx = tracy::VkCtx*;
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf );
#define TracyVkContext( physdev, device, queue, cmdbuf ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, nullptr, nullptr );
#define TracyVkContextCalibrated( physdev, device, queue, cmdbuf, gpdctd, gct ) tracy::CreateVkContext( physdev, device, queue, cmdbuf, gpdctd, gct );
#define TracyVkDestroy( ctx ) tracy::DestroyVkContext( ctx );
#if defined TRACY_HAS_CALLSTACK && defined TRACY_CALLSTACK
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, TRACY_CALLSTACK, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, TRACY_CALLSTACK, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, TRACY_CALLSTACK, true )
#else
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkNamedZone( ctx, varname, cmdbuf, name, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkNamedZoneC( ctx, varname, cmdbuf, name, color, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, active );
# define TracyVkZone( ctx, cmdbuf, name ) TracyVkNamedZone( ctx, ___tracy_gpu_zone, cmdbuf, name, true )
# define TracyVkZoneC( ctx, cmdbuf, name, color ) TracyVkNamedZoneC( ctx, ___tracy_gpu_zone, cmdbuf, name, color, true )
#endif
#define TracyVkCollect( ctx, cmdbuf ) ctx->Collect( cmdbuf );
#ifdef TRACY_HAS_CALLSTACK
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static const tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkNamedZoneS( ctx, varname, cmdbuf, name, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkNamedZoneCS( ctx, varname, cmdbuf, name, color, depth, active ) static constexpr tracy::SourceLocationData TracyConcat(__tracy_gpu_source_location,__LINE__) { name, __FUNCTION__, __FILE__, (uint32_t)__LINE__, color }; tracy::VkCtxScope varname( ctx, &TracyConcat(__tracy_gpu_source_location,__LINE__), cmdbuf, depth, active );
# define TracyVkZoneS( ctx, cmdbuf, name, depth ) TracyVkNamedZoneS( ctx, ___tracy_gpu_zone, cmdbuf, name, depth, true )
# define TracyVkZoneCS( ctx, cmdbuf, name, color, depth ) TracyVkNamedZoneCS( ctx, ___tracy_gpu_zone, cmdbuf, name, color, depth, true )
#else

View File

@ -1,10 +1,8 @@
#ifdef _MSC_VER
# pragma warning(disable:4996)
#endif
namespace tracy
{
#if defined __linux__ && defined __ARM_ARCH
static const char* DecodeArmImplementer( uint32_t v )
{
static char buf[16];
@ -16,6 +14,7 @@ static const char* DecodeArmImplementer( uint32_t v )
case 0x44: return "DEC";
case 0x46: return "Fujitsu";
case 0x48: return "HiSilicon";
case 0x49: return "Infineon";
case 0x4d: return "Motorola";
case 0x4e: return "Nvidia";
case 0x50: return "Applied Micro";
@ -27,6 +26,7 @@ static const char* DecodeArmImplementer( uint32_t v )
case 0x66: return "Faraday";
case 0x68: return "HXT";
case 0x69: return "Intel";
case 0xc0: return "Ampere Computing";
default: break;
}
sprintf( buf, "0x%x", v );
@ -75,6 +75,7 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xc60: return " Cortex-M0+";
case 0xd00: return " AArch64 simulator";
case 0xd01: return " Cortex-A32";
case 0xd02: return " Cortex-A34";
case 0xd03: return " Cortex-A53";
case 0xd04: return " Cortex-A35";
case 0xd05: return " Cortex-A55";
@ -91,6 +92,10 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xd13: return " Cortex-R52";
case 0xd20: return " Cortex-M23";
case 0xd21: return " Cortex-M33";
case 0xd40: return " Zeus";
case 0xd41: return " Cortex-A78";
case 0xd43: return " Cortex-A65AE";
case 0xd44: return " Cortex-X1";
case 0xd4a: return " Neoverse E1";
default: break;
}
@ -110,6 +115,13 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
case 0xa2: return " ThunderX 81XX";
case 0xa3: return " ThunderX 83XX";
case 0xaf: return " ThunderX2 99xx";
case 0xb0: return " OcteonTX2";
case 0xb1: return " OcteonTX2 T98";
case 0xb2: return " OcteonTX2 T96";
case 0xb3: return " OcteonTX2 F95";
case 0xb4: return " OcteonTX2 F95N";
case 0xb5: return " OcteonTX2 F95MM";
case 0xb8: return " ThunderX3 T110";
default: break;
}
case 0x44:
@ -212,6 +224,8 @@ static const char* DecodeArmPart( uint32_t impl, uint32_t part )
return buf;
}
#elif defined __APPLE__ && TARGET_OS_IPHONE == 1
static const char* DecodeIosDevice( const char* id )
{
static const char* DeviceTable[] = {
@ -252,6 +266,7 @@ static const char* DecodeIosDevice( const char* id )
"iPhone12,1", "iPhone 11",
"iPhone12,3", "iPhone 11 Pro",
"iPhone12,5", "iPhone 11 Pro Max",
"iPhone12,8", "iPhone SE 2nd Gen",
"iPad1,1", "iPad (A1219/A1337)",
"iPad2,1", "iPad 2 (A1395)",
"iPad2,2", "iPad 2 (A1396)",
@ -302,6 +317,10 @@ static const char* DecodeIosDevice( const char* id )
"iPad8,6", "iPad Pro 12.9\" 3rd gen (A1876)",
"iPad8,7", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
"iPad8,8", "iPad Pro 12.9\" 3rd gen (A1895/A1983/A2014)",
"iPad8,9", "iPad Pro 11\" 2nd gen (Wifi)",
"iPad8,10", "iPad Pro 11\" 2nd gen (Wifi+Cellular)",
"iPad8,11", "iPad Pro 12.9\" 4th gen (Wifi)",
"iPad8,12", "iPad Pro 12.9\" 4th gen (Wifi+Cellular)",
"iPad11,1", "iPad Mini 5th gen (A2133)",
"iPad11,2", "iPad Mini 5th gen (A2124/A2125/A2126)",
"iPad11,3", "iPad Air 3rd gen (A2152)",
@ -325,4 +344,6 @@ static const char* DecodeIosDevice( const char* id )
return id;
}
#endif
}

View File

@ -222,9 +222,9 @@ static const char* GetModuleName( uint64_t addr )
return "[unknown]";
}
SymbolData DecodeSymbolAddress( uint64_t ptr )
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
{
SymbolData sym;
CallstackSymbolData sym;
IMAGEHLP_LINE64 line;
DWORD displacement = 0;
line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
@ -242,9 +242,9 @@ SymbolData DecodeSymbolAddress( uint64_t ptr )
return sym;
}
SymbolData DecodeCodeAddress( uint64_t ptr )
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
{
SymbolData sym;
CallstackSymbolData sym;
const auto proc = GetCurrentProcess();
bool done = false;
@ -442,14 +442,10 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, const char* fn, int lineno, const char* function )
{
auto& sym = *(SymbolData*)data;
auto& sym = *(CallstackSymbolData*)data;
if( !fn )
{
const char* symloc = nullptr;
Dl_info dlinfo;
if( dladdr( (void*)pc, &dlinfo ) ) symloc = dlinfo.dli_fname;
if( !symloc ) symloc = "[unknown]";
sym.file = symloc;
sym.file = "[unknown]";
sym.line = 0;
sym.needFree = false;
}
@ -465,20 +461,20 @@ static int SymbolAddressDataCb( void* data, uintptr_t pc, uintptr_t lowaddr, con
static void SymbolAddressErrorCb( void* data, const char* /*msg*/, int /*errnum*/ )
{
auto& sym = *(SymbolData*)data;
auto& sym = *(CallstackSymbolData*)data;
sym.file = "[unknown]";
sym.line = 0;
sym.needFree = false;
}
SymbolData DecodeSymbolAddress( uint64_t ptr )
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
{
SymbolData sym;
CallstackSymbolData sym;
backtrace_pcinfo( cb_bts, ptr, SymbolAddressDataCb, SymbolAddressErrorCb, &sym );
return sym;
}
SymbolData DecodeCodeAddress( uint64_t ptr )
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
{
return DecodeSymbolAddress( ptr );
}
@ -494,14 +490,12 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con
if( !fn && !function )
{
const char* symname = nullptr;
const char* symloc = nullptr;
auto vptr = (void*)pc;
ptrdiff_t symoff = 0;
Dl_info dlinfo;
if( dladdr( vptr, &dlinfo ) )
{
symloc = dlinfo.dli_fname;
symname = dlinfo.dli_sname;
symoff = (char*)pc - (char*)dlinfo.dli_saddr;
@ -518,7 +512,6 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con
}
if( !symname ) symname = "[unknown]";
if( !symloc ) symloc = "[unknown]";
if( symoff == 0 )
{
@ -536,15 +529,7 @@ static int CallstackDataCb( void* /*data*/, uintptr_t pc, uintptr_t lowaddr, con
cb_data[cb_num].name = name;
}
char buf[32];
const auto addrlen = sprintf( buf, " [%p]", (void*)pc );
const auto loclen = strlen( symloc );
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
memcpy( loc, symloc, loclen );
memcpy( loc + loclen, buf, addrlen );
loc[loclen + addrlen] = '\0';
cb_data[cb_num].file = loc;
cb_data[cb_num].file = CopyString( "[unknown]" );
cb_data[cb_num].line = 0;
}
else
@ -652,16 +637,16 @@ const char* DecodeCallstackPtrFast( uint64_t ptr )
return ret;
}
SymbolData DecodeSymbolAddress( uint64_t ptr )
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr )
{
const char* symloc = nullptr;
Dl_info dlinfo;
if( dladdr( (void*)ptr, &dlinfo ) ) symloc = dlinfo.dli_fname;
if( !symloc ) symloc = "[unknown]";
return SymbolData { symloc, 0, false };
return CallstackSymbolData { symloc, 0, false };
}
SymbolData DecodeCodeAddress( uint64_t ptr )
CallstackSymbolData DecodeCodeAddress( uint64_t ptr )
{
return DecodeSymbolAddress( ptr );
}
@ -717,15 +702,7 @@ CallstackEntryData DecodeCallstackPtr( uint64_t ptr )
cb.name = name;
}
char buf[32];
const auto addrlen = sprintf( buf, " [%p]", (void*)ptr );
const auto loclen = strlen( symloc );
auto loc = (char*)tracy_malloc( loclen + addrlen + 1 );
memcpy( loc, symloc, loclen );
memcpy( loc + loclen, buf, addrlen );
loc[loclen + addrlen] = '\0';
cb.file = loc;
cb.file = CopyString( "[unknown]" );
cb.symLen = 0;
cb.symAddr = (uint64_t)symaddr;

View File

@ -22,7 +22,7 @@
namespace tracy
{
struct SymbolData
struct CallstackSymbolData
{
const char* file;
uint32_t line;
@ -45,8 +45,8 @@ struct CallstackEntryData
const char* imageName;
};
SymbolData DecodeSymbolAddress( uint64_t ptr );
SymbolData DecodeCodeAddress( uint64_t ptr );
CallstackSymbolData DecodeSymbolAddress( uint64_t ptr );
CallstackSymbolData DecodeCodeAddress( uint64_t ptr );
const char* DecodeCallstackPtrFast( uint64_t ptr );
CallstackEntryData DecodeCallstackPtr( uint64_t ptr );
void InitCallstack();

View File

@ -172,6 +172,12 @@ static tracy_force_inline uint64_t ProcessRGB( const uint8_t* src )
return uint64_t( to565( src[0], src[1], src[2] ) ) << 16;
}
__m128i amask = _mm_set1_epi32( 0xFFFFFF );
px0 = _mm_and_si128( px0, amask );
px1 = _mm_and_si128( px1, amask );
px2 = _mm_and_si128( px2, amask );
px3 = _mm_and_si128( px3, amask );
__m128i min0 = _mm_min_epu8( px0, px1 );
__m128i min1 = _mm_min_epu8( px2, px3 );
__m128i min2 = _mm_min_epu8( min0, min1 );
@ -492,6 +498,12 @@ static tracy_force_inline void ProcessRGB_AVX( const uint8_t* src, char*& dst )
return;
}
__m256i amask = _mm256_set1_epi32( 0xFFFFFF );
px0 = _mm256_and_si256( px0, amask );
px1 = _mm256_and_si256( px1, amask );
px2 = _mm256_and_si256( px2, amask );
px3 = _mm256_and_si256( px3, amask );
__m256i min0 = _mm256_min_epu8( px0, px1 );
__m256i min1 = _mm256_min_epu8( px2, px3 );
__m256i min2 = _mm256_min_epu8( min0, min1 );

View File

@ -23,7 +23,8 @@ public:
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
TracyLfqPrepare( QueueType::LockAnnounce );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
@ -31,7 +32,7 @@ public:
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
Profiler::QueueSerialFinish();
}
LockableCtx( const LockableCtx& ) = delete;
@ -39,14 +40,14 @@ public:
tracy_force_inline ~LockableCtx()
{
TracyLfqPrepare( QueueType::LockTerminate );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::Lockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
Profiler::QueueSerialFinish();
}
tracy_force_inline bool BeforeLock()
@ -69,7 +70,6 @@ public:
MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::Lockable );
Profiler::QueueSerialFinish();
return true;
}
@ -155,16 +155,18 @@ public:
tracy_force_inline void CustomName( const char* name, size_t size )
{
auto ptr = (char*)tracy_malloc( size+1 );
assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, name, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::LockName );
MemWrite( &item->lockName.id, m_id );
MemWrite( &item->lockName.name, (uint64_t)ptr );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockName );
MemWrite( &item->lockNameFat.id, m_id );
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
MemWrite( &item->lockNameFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
Profiler::QueueSerialFinish();
}
private:
@ -236,17 +238,16 @@ public:
{
assert( m_id != std::numeric_limits<uint32_t>::max() );
TracyLfqPrepare( QueueType::LockAnnounce );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockAnnounce );
MemWrite( &item->lockAnnounce.id, m_id );
MemWrite( &item->lockAnnounce.time, Profiler::GetTime() );
MemWrite( &item->lockAnnounce.lckloc, (uint64_t)srcloc );
MemWrite( &item->lockAnnounce.type, LockType::SharedLockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
Profiler::QueueSerialFinish();
}
SharedLockableCtx( const SharedLockableCtx& ) = delete;
@ -254,16 +255,14 @@ public:
tracy_force_inline ~SharedLockableCtx()
{
TracyLfqPrepare( QueueType::LockTerminate );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockTerminate );
MemWrite( &item->lockTerminate.id, m_id );
MemWrite( &item->lockTerminate.time, Profiler::GetTime() );
MemWrite( &item->lockTerminate.type, LockType::SharedLockable );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
Profiler::QueueSerialFinish();
}
tracy_force_inline bool BeforeLock()
@ -286,7 +285,6 @@ public:
MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
Profiler::QueueSerialFinish();
return true;
}
@ -369,7 +367,6 @@ public:
MemWrite( &item->lockWait.thread, GetThreadHandle() );
MemWrite( &item->lockWait.id, m_id );
MemWrite( &item->lockWait.time, Profiler::GetTime() );
MemWrite( &item->lockWait.type, LockType::SharedLockable );
Profiler::QueueSerialFinish();
return true;
}
@ -455,16 +452,18 @@ public:
tracy_force_inline void CustomName( const char* name, size_t size )
{
auto ptr = (char*)tracy_malloc( size+1 );
assert( size < std::numeric_limits<uint16_t>::max() );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, name, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::LockName );
MemWrite( &item->lockName.id, m_id );
MemWrite( &item->lockName.name, (uint64_t)ptr );
auto item = Profiler::QueueSerial();
MemWrite( &item->hdr.type, QueueType::LockName );
MemWrite( &item->lockNameFat.id, m_id );
MemWrite( &item->lockNameFat.name, (uint64_t)ptr );
MemWrite( &item->lockNameFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
#endif
TracyLfqCommit;
Profiler::QueueSerialFinish();
}
private:

View File

@ -37,6 +37,10 @@
# include <sys/sysctl.h>
#endif
#if defined __APPLE__
# include "TargetConditionals.h"
#endif
#include <algorithm>
#include <assert.h>
#include <atomic>
@ -80,11 +84,6 @@
# endif
#endif
#if defined TRACY_HW_TIMER && __ARM_ARCH >= 6 && !defined TARGET_OS_IOS
# include <signal.h>
# include <setjmp.h>
#endif
#if defined _WIN32 || defined __CYGWIN__
# include <lmcons.h>
extern "C" typedef LONG (WINAPI *t_RtlGetVersion)( PRTL_OSVERSIONINFOW );
@ -93,9 +92,6 @@ extern "C" typedef BOOL (WINAPI *t_GetLogicalProcessorInformationEx)( LOGICAL_PR
# include <unistd.h>
# include <limits.h>
#endif
#if defined __APPLE__
# include "TargetConditionals.h"
#endif
#if defined __linux__
# include <sys/sysinfo.h>
# include <sys/utsname.h>
@ -231,7 +227,11 @@ static int64_t SetupHwTimer()
const char* noCheck = getenv( "TRACY_NO_INVARIANT_CHECK" );
if( !noCheck || noCheck[0] != '1' )
{
#if defined _WIN32 || defined __CYGWIN__
InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*.\nAlternatively you may rebuild the application with the TRACY_TIMER_QPC define to use lower resolution timer." );
#else
InitFailure( "CPU doesn't support invariant TSC.\nDefine TRACY_NO_INVARIANT_CHECK=1 to ignore this error, *if you know what you are doing*." );
#endif
}
}
#endif
@ -270,6 +270,7 @@ static const char* GetProcessName()
return processName;
}
#if defined __linux__ && defined __ARM_ARCH
static uint32_t GetHex( char*& ptr, int skip )
{
uint32_t ret;
@ -287,6 +288,7 @@ static uint32_t GetHex( char*& ptr, int skip )
ptr = end;
return ret;
}
#endif
static const char* GetHostInfo()
{
@ -540,6 +542,8 @@ static char s_crashText[1024];
LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
{
if( !GetProfiler().IsConnected() ) return EXCEPTION_CONTINUE_SEARCH;
const unsigned ec = pExp->ExceptionRecord->ExceptionCode;
auto msgPtr = s_crashText;
switch( ec )
@ -590,12 +594,12 @@ LONG WINAPI CrashFilter( PEXCEPTION_POINTERS pExp )
}
{
GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
TracyLfqPrepare( QueueType::CrashReport );
item->crashReport.time = Profiler::GetTime();
item->crashReport.text = (uint64_t)s_crashText;
TracyLfqCommit;
GetProfiler().SendCallstack( 60, "KiUserExceptionDispatcher" );
}
HANDLE h = CreateToolhelp32Snapshot( TH32CS_SNAPTHREAD, 0 );
@ -822,12 +826,12 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
}
{
GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
TracyLfqPrepare( QueueType::CrashReport );
item->crashReport.time = Profiler::GetTime();
item->crashReport.text = (uint64_t)s_crashText;
TracyLfqCommit;
GetProfiler().SendCallstack( 60, "__kernel_rt_sigreturn" );
}
DIR* dp = opendir( "/proc/self/task" );
@ -863,7 +867,7 @@ static void CrashHandler( int signal, siginfo_t* info, void* /*ucontext*/ )
enum { QueuePrealloc = 256 * 1024 };
static Profiler* s_instance;
static Profiler* s_instance = nullptr;
static Thread* s_thread;
static Thread* s_compressThread;
@ -871,6 +875,19 @@ static Thread* s_compressThread;
static Thread* s_sysTraceThread = nullptr;
#endif
TRACY_API bool ProfilerAvailable() { return s_instance != nullptr; }
TRACY_API int64_t GetFrequencyQpc()
{
#if defined _WIN32 || defined __CYGWIN__
LARGE_INTEGER t;
QueryPerformanceFrequency( &t );
return t.QuadPart;
#else
return 0;
#endif
}
#ifdef TRACY_DELAYED_INIT
struct ThreadNameData;
TRACY_API moodycamel::ConcurrentQueue<QueueItem>& GetQueue();
@ -911,6 +928,25 @@ struct ProfilerThreadData
# endif
};
# ifdef TRACY_MANUAL_LIFETIME
ProfilerData* s_profilerData = nullptr;
TRACY_API void StartupProfiler()
{
s_profilerData = new ProfilerData;
s_profilerData->profiler.SpawnWorkerThreads();
}
static ProfilerData& GetProfilerData()
{
assert(s_profilerData);
return *s_profilerData;
}
TRACY_API void ShutdownProfiler()
{
delete s_profilerData;
s_profilerData = nullptr;
rpmalloc_finalize();
}
# else
static std::atomic<int> profilerDataLock { 0 };
static std::atomic<ProfilerData*> profilerData { nullptr };
@ -932,6 +968,7 @@ static ProfilerData& GetProfilerData()
}
return *ptr;
}
# endif
static ProfilerThreadData& GetProfilerThreadData()
{
@ -953,10 +990,12 @@ std::atomic<ThreadNameData*>& GetThreadNameData() { return GetProfilerData().thr
TRACY_API LuaZoneState& GetLuaZoneState() { return GetProfilerThreadData().luaZoneState; }
# endif
# ifndef TRACY_MANUAL_LIFETIME
namespace
{
const auto& __profiler_init = GetProfiler();
}
# endif
#else
TRACY_API void InitRPMallocThread()
@ -1044,8 +1083,8 @@ Profiler::Profiler()
, m_fiQueue( 16 )
, m_fiDequeue( 16 )
, m_frameCount( 0 )
#ifdef TRACY_ON_DEMAND
, m_isConnected( false )
#ifdef TRACY_ON_DEMAND
, m_connectionId( 0 )
, m_deferredQueue( 64*1024 )
#endif
@ -1081,6 +1120,13 @@ Profiler::Profiler()
m_userPort = atoi( userPort );
}
#if !defined(TRACY_DELAYED_INIT) || !defined(TRACY_MANUAL_LIFETIME)
SpawnWorkerThreads();
#endif
}
void Profiler::SpawnWorkerThreads()
{
s_thread = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_thread) Thread( LaunchWorker, this );
@ -1172,6 +1218,8 @@ void Profiler::Worker()
s_profilerTid = syscall( SYS_gettid );
#endif
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy Profiler" );
#ifdef TRACY_DATA_PORT
@ -1223,6 +1271,12 @@ void Profiler::Worker()
uint8_t cpuArch = CpuArchUnknown;
#endif
#ifdef TRACY_NO_CODE_TRANSFER
uint8_t codeTransfer = 0;
#else
uint8_t codeTransfer = 1;
#endif
#if defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64
uint32_t regs[4];
char manufacturer[12];
@ -1250,6 +1304,7 @@ void Profiler::Worker()
MemWrite( &welcome.onDemand, onDemand );
MemWrite( &welcome.isApple, isApple );
MemWrite( &welcome.cpuArch, cpuArch );
MemWrite( &welcome.codeTransfer, codeTransfer );
memcpy( welcome.cpuManufacturer, manufacturer, 12 );
MemWrite( &welcome.cpuId, cpuId );
memcpy( welcome.programName, procname, pnsz );
@ -1318,6 +1373,11 @@ void Profiler::Worker()
#ifndef TRACY_NO_EXIT
if( !m_noExit && ShouldExit() )
{
if( m_broadcast )
{
broadcastMsg.activeTime = -1;
m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
}
m_shutdownFinished.store( true, std::memory_order_relaxed );
return;
}
@ -1335,12 +1395,20 @@ void Profiler::Worker()
{
lastBroadcast = t;
const auto ts = std::chrono::duration_cast<std::chrono::seconds>( std::chrono::system_clock::now().time_since_epoch() ).count();
broadcastMsg.activeTime = uint32_t( ts - m_epoch );
broadcastMsg.activeTime = int32_t( ts - m_epoch );
assert( broadcastMsg.activeTime >= 0 );
m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
}
}
}
if( m_broadcast )
{
lastBroadcast = 0;
broadcastMsg.activeTime = -1;
m_broadcast->Send( broadcastPort, &broadcastMsg, broadcastLen );
}
// Handshake
{
char shibboleth[HandshakeShibbolethSize];
@ -1378,8 +1446,8 @@ void Profiler::Worker()
const auto currentTime = GetTime();
ClearQueues( token );
m_connectionId.fetch_add( 1, std::memory_order_release );
m_isConnected.store( true, std::memory_order_release );
#endif
m_isConnected.store( true, std::memory_order_release );
HandshakeStatus handshake = HandshakeWelcome;
m_sock->Send( &handshake, sizeof( handshake ) );
@ -1403,16 +1471,19 @@ void Profiler::Worker()
for( auto& item : m_deferredQueue )
{
uint64_t ptr;
uint16_t size;
const auto idx = MemRead<uint8_t>( &item.hdr.idx );
switch( (QueueType)idx )
{
case QueueType::MessageAppInfo:
ptr = MemRead<uint64_t>( &item.message.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
ptr = MemRead<uint64_t>( &item.messageFat.text );
size = MemRead<uint16_t>( &item.messageFat.size );
SendSingleString( (const char*)ptr, size );
break;
case QueueType::LockName:
ptr = MemRead<uint64_t>( &item.lockName.name );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
ptr = MemRead<uint64_t>( &item.lockNameFat.name );
size = MemRead<uint16_t>( &item.lockNameFat.size );
SendSingleString( (const char*)ptr, size );
break;
default:
break;
@ -1469,8 +1540,8 @@ void Profiler::Worker()
}
if( ShouldExit() ) break;
#ifdef TRACY_ON_DEMAND
m_isConnected.store( false, std::memory_order_release );
#ifdef TRACY_ON_DEMAND
m_bufferOffset = 0;
m_bufferStart = 0;
#endif
@ -1592,6 +1663,8 @@ void Profiler::Worker()
void Profiler::CompressWorker()
{
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy DXT1" );
while( m_timeBegin.load( std::memory_order_relaxed ) == 0 ) std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
rpmalloc_thread_initialize();
@ -1631,12 +1704,12 @@ void Profiler::CompressWorker()
tracy_free( fi->image );
TracyLfqPrepare( QueueType::FrameImage );
MemWrite( &item->frameImage.image, (uint64_t)etc1buf );
MemWrite( &item->frameImage.frame, fi->frame );
MemWrite( &item->frameImage.w, w );
MemWrite( &item->frameImage.h, h );
MemWrite( &item->frameImageFat.image, (uint64_t)etc1buf );
MemWrite( &item->frameImageFat.frame, fi->frame );
MemWrite( &item->frameImageFat.w, w );
MemWrite( &item->frameImageFat.h, h );
uint8_t flip = fi->flip;
MemWrite( &item->frameImage.flip, flip );
MemWrite( &item->frameImageFat.flip, flip );
TracyLfqCommit;
fi++;
@ -1664,7 +1737,7 @@ static void FreeAssociatedMemory( const QueueItem& item )
{
case QueueType::ZoneText:
case QueueType::ZoneName:
ptr = MemRead<uint64_t>( &item.zoneText.text );
ptr = MemRead<uint64_t>( &item.zoneTextFat.text );
tracy_free( (void*)ptr );
break;
case QueueType::Message:
@ -1674,7 +1747,7 @@ static void FreeAssociatedMemory( const QueueItem& item )
#ifndef TRACY_ON_DEMAND
case QueueType::MessageAppInfo:
#endif
ptr = MemRead<uint64_t>( &item.message.text );
ptr = MemRead<uint64_t>( &item.messageFat.text );
tracy_free( (void*)ptr );
break;
case QueueType::ZoneBeginAllocSrcLoc:
@ -1683,30 +1756,27 @@ static void FreeAssociatedMemory( const QueueItem& item )
tracy_free( (void*)ptr );
break;
case QueueType::CallstackMemory:
ptr = MemRead<uint64_t>( &item.callstackMemory.ptr );
tracy_free( (void*)ptr );
break;
case QueueType::Callstack:
ptr = MemRead<uint64_t>( &item.callstack.ptr );
ptr = MemRead<uint64_t>( &item.callstackFat.ptr );
tracy_free( (void*)ptr );
break;
case QueueType::CallstackAlloc:
ptr = MemRead<uint64_t>( &item.callstackAlloc.nativePtr );
ptr = MemRead<uint64_t>( &item.callstackAllocFat.nativePtr );
tracy_free( (void*)ptr );
ptr = MemRead<uint64_t>( &item.callstackAlloc.ptr );
ptr = MemRead<uint64_t>( &item.callstackAllocFat.ptr );
tracy_free( (void*)ptr );
break;
case QueueType::CallstackSample:
ptr = MemRead<uint64_t>( &item.callstackSample.ptr );
ptr = MemRead<uint64_t>( &item.callstackSampleFat.ptr );
tracy_free( (void*)ptr );
break;
case QueueType::FrameImage:
ptr = MemRead<uint64_t>( &item.frameImage.image );
ptr = MemRead<uint64_t>( &item.frameImageFat.image );
tracy_free( (void*)ptr );
break;
#ifndef TRACY_ON_DEMAND
case QueueType::LockName:
ptr = MemRead<uint64_t>( &item.lockName.name );
ptr = MemRead<uint64_t>( &item.lockNameFat.name );
tracy_free( (void*)ptr );
break;
#endif
@ -1779,6 +1849,7 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
while( sz-- > 0 )
{
uint64_t ptr;
uint16_t size;
auto idx = MemRead<uint8_t>( &item->hdr.idx );
if( idx < (int)QueueType::Terminate )
{
@ -1786,21 +1857,29 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
{
case QueueType::ZoneText:
case QueueType::ZoneName:
ptr = MemRead<uint64_t>( &item->zoneText.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
ptr = MemRead<uint64_t>( &item->zoneTextFat.text );
size = MemRead<uint16_t>( &item->zoneTextFat.size );
SendSingleString( (const char*)ptr, size );
tracy_free( (void*)ptr );
break;
case QueueType::Message:
case QueueType::MessageColor:
case QueueType::MessageCallstack:
ptr = MemRead<uint64_t>( &item->messageFat.text );
size = MemRead<uint16_t>( &item->messageFat.size );
SendSingleString( (const char*)ptr, size );
tracy_free( (void*)ptr );
break;
case QueueType::MessageColor:
case QueueType::MessageColorCallstack:
ptr = MemRead<uint64_t>( &item->message.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
ptr = MemRead<uint64_t>( &item->messageColorFat.text );
size = MemRead<uint16_t>( &item->messageColorFat.size );
SendSingleString( (const char*)ptr, size );
tracy_free( (void*)ptr );
break;
case QueueType::MessageAppInfo:
ptr = MemRead<uint64_t>( &item->message.text );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
ptr = MemRead<uint64_t>( &item->messageFat.text );
size = MemRead<uint16_t>( &item->messageFat.size );
SendSingleString( (const char*)ptr, size );
#ifndef TRACY_ON_DEMAND
tracy_free( (void*)ptr );
#endif
@ -1815,54 +1894,44 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
ptr = MemRead<uint64_t>( &item->zoneBegin.srcloc );
SendSourceLocationPayload( ptr );
tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break;
}
case QueueType::Callstack:
ptr = MemRead<uint64_t>( &item->callstack.ptr );
ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
SendCallstackPayload( ptr );
tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break;
case QueueType::CallstackAlloc:
ptr = MemRead<uint64_t>( &item->callstackAlloc.nativePtr );
ptr = MemRead<uint64_t>( &item->callstackAllocFat.nativePtr );
if( ptr != 0 )
{
CutCallstack( (void*)ptr, "lua_pcall" );
SendCallstackPayload( ptr );
tracy_free( (void*)ptr );
}
ptr = MemRead<uint64_t>( &item->callstackAlloc.ptr );
ptr = MemRead<uint64_t>( &item->callstackAllocFat.ptr );
SendCallstackAlloc( ptr );
tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break;
case QueueType::CallstackSample:
{
ptr = MemRead<uint64_t>( &item->callstackSample.ptr );
ptr = MemRead<uint64_t>( &item->callstackSampleFat.ptr );
SendCallstackPayload64( ptr );
tracy_free( (void*)ptr );
int64_t t = MemRead<int64_t>( &item->callstackSample.time );
int64_t t = MemRead<int64_t>( &item->callstackSampleFat.time );
int64_t dt = t - refCtx;
refCtx = t;
MemWrite( &item->callstackSample.time, dt );
idx++;
MemWrite( &item->hdr.idx, idx );
MemWrite( &item->callstackSampleFat.time, dt );
break;
}
case QueueType::FrameImage:
{
ptr = MemRead<uint64_t>( &item->frameImage.image );
const auto w = MemRead<uint16_t>( &item->frameImage.w );
const auto h = MemRead<uint16_t>( &item->frameImage.h );
ptr = MemRead<uint64_t>( &item->frameImageFat.image );
const auto w = MemRead<uint16_t>( &item->frameImageFat.w );
const auto h = MemRead<uint16_t>( &item->frameImageFat.h );
const auto csz = size_t( w * h / 2 );
SendLongString( ptr, (const char*)ptr, csz, QueueType::FrameImageData );
tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break;
}
case QueueType::ZoneBegin:
@ -1882,13 +1951,6 @@ Profiler::DequeueStatus Profiler::Dequeue( moodycamel::ConsumerToken& token )
MemWrite( &item->zoneEnd.time, dt );
break;
}
case QueueType::LockName:
ptr = MemRead<uint64_t>( &item->lockName.name );
SendString( ptr, (const char*)ptr, QueueType::CustomStringData );
#ifndef TRACY_ON_DEMAND
tracy_free( (void*)ptr );
#endif
break;
case QueueType::GpuZoneBegin:
case QueueType::GpuZoneBeginCallstack:
{
@ -2056,11 +2118,9 @@ Profiler::DequeueStatus Profiler::DequeueSerial()
switch( (QueueType)idx )
{
case QueueType::CallstackMemory:
ptr = MemRead<uint64_t>( &item->callstackMemory.ptr );
ptr = MemRead<uint64_t>( &item->callstackFat.ptr );
SendCallstackPayload( ptr );
tracy_free( (void*)ptr );
idx++;
MemWrite( &item->hdr.idx, idx );
break;
case QueueType::LockWait:
case QueueType::LockSharedWait:
@ -2089,8 +2149,20 @@ Profiler::DequeueStatus Profiler::DequeueSerial()
MemWrite( &item->lockRelease.time, dt );
break;
}
case QueueType::LockName:
{
ptr = MemRead<uint64_t>( &item->lockNameFat.name );
uint16_t size = MemRead<uint16_t>( &item->lockNameFat.size );
SendSingleString( (const char*)ptr, size );
#ifndef TRACY_ON_DEMAND
tracy_free( (void*)ptr );
#endif
break;
}
case QueueType::MemAlloc:
case QueueType::MemAllocNamed:
case QueueType::MemAllocCallstack:
case QueueType::MemAllocCallstackNamed:
{
int64_t t = MemRead<int64_t>( &item->memAlloc.time );
int64_t dt = t - refSerial;
@ -2099,7 +2171,9 @@ Profiler::DequeueStatus Profiler::DequeueSerial()
break;
}
case QueueType::MemFree:
case QueueType::MemFreeNamed:
case QueueType::MemFreeCallstack:
case QueueType::MemFreeCallstackNamed:
{
int64_t t = MemRead<int64_t>( &item->memFree.time );
int64_t dt = t - refSerial;
@ -2166,11 +2240,10 @@ bool Profiler::SendData( const char* data, size_t len )
return m_sock->Send( m_lz4Buf, lz4sz + sizeof( lz4sz_t ) ) != -1;
}
void Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
void Profiler::SendString( uint64_t str, const char* ptr, size_t len, QueueType type )
{
assert( type == QueueType::StringData ||
type == QueueType::ThreadName ||
type == QueueType::CustomStringData ||
type == QueueType::PlotName ||
type == QueueType::FrameName ||
type == QueueType::ExternalName ||
@ -2180,7 +2253,6 @@ void Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
MemWrite( &item.hdr.type, type );
MemWrite( &item.stringTransfer.ptr, str );
auto len = strlen( ptr );
assert( len <= std::numeric_limits<uint16_t>::max() );
auto l16 = uint16_t( len );
@ -2191,6 +2263,36 @@ void Profiler::SendString( uint64_t str, const char* ptr, QueueType type )
AppendDataUnsafe( ptr, l16 );
}
void Profiler::SendSingleString( const char* ptr, size_t len )
{
QueueItem item;
MemWrite( &item.hdr.type, QueueType::SingleStringData );
assert( len <= std::numeric_limits<uint16_t>::max() );
auto l16 = uint16_t( len );
NeedDataSize( QueueDataSize[(int)QueueType::SingleStringData] + sizeof( l16 ) + l16 );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SingleStringData] );
AppendDataUnsafe( &l16, sizeof( l16 ) );
AppendDataUnsafe( ptr, l16 );
}
void Profiler::SendSecondString( const char* ptr, size_t len )
{
QueueItem item;
MemWrite( &item.hdr.type, QueueType::SecondStringData );
assert( len <= std::numeric_limits<uint16_t>::max() );
auto l16 = uint16_t( len );
NeedDataSize( QueueDataSize[(int)QueueType::SecondStringData] + sizeof( l16 ) + l16 );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SecondStringData] );
AppendDataUnsafe( &l16, sizeof( l16 ) );
AppendDataUnsafe( ptr, l16 );
}
void Profiler::SendLongString( uint64_t str, const char* ptr, size_t len, QueueType type )
{
assert( type == QueueType::FrameImageData ||
@ -2234,16 +2336,17 @@ void Profiler::SendSourceLocationPayload( uint64_t _ptr )
MemWrite( &item.hdr.type, QueueType::SourceLocationPayload );
MemWrite( &item.stringTransfer.ptr, _ptr );
const auto len = *((uint32_t*)ptr);
assert( len <= std::numeric_limits<uint16_t>::max() );
assert( len > 4 );
const auto l16 = uint16_t( len - 4 );
uint16_t len;
memcpy( &len, ptr, sizeof( len ) );
assert( len > 2 );
len -= 2;
ptr += 2;
NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( l16 ) + l16 );
NeedDataSize( QueueDataSize[(int)QueueType::SourceLocationPayload] + sizeof( len ) + len );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::SourceLocationPayload] );
AppendDataUnsafe( &l16, sizeof( l16 ) );
AppendDataUnsafe( ptr + 4, l16 );
AppendDataUnsafe( &len, sizeof( len ) );
AppendDataUnsafe( ptr, len );
}
void Profiler::SendCallstackPayload( uint64_t _ptr )
@ -2304,15 +2407,15 @@ void Profiler::SendCallstackAlloc( uint64_t _ptr )
MemWrite( &item.hdr.type, QueueType::CallstackAllocPayload );
MemWrite( &item.stringTransfer.ptr, _ptr );
const auto len = *((uint32_t*)ptr);
assert( len <= std::numeric_limits<uint16_t>::max() );
const auto l16 = uint16_t( len );
uint16_t len;
memcpy( &len, ptr, 2 );
ptr += 2;
NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( l16 ) + l16 );
NeedDataSize( QueueDataSize[(int)QueueType::CallstackAllocPayload] + sizeof( len ) + len );
AppendDataUnsafe( &item, QueueDataSize[(int)QueueType::CallstackAllocPayload] );
AppendDataUnsafe( &l16, sizeof( l16 ) );
AppendDataUnsafe( ptr + 4, l16 );
AppendDataUnsafe( &len, sizeof( len ) );
AppendDataUnsafe( ptr, len );
}
void Profiler::SendCallstackFrame( uint64_t ptr )
@ -2321,13 +2424,12 @@ void Profiler::SendCallstackFrame( uint64_t ptr )
const auto frameData = DecodeCallstackPtr( ptr );
{
SendString( uint64_t( frameData.imageName ), frameData.imageName, QueueType::CustomStringData );
SendSingleString( frameData.imageName );
QueueItem item;
MemWrite( &item.hdr.type, QueueType::CallstackFrameSize );
MemWrite( &item.callstackFrameSize.ptr, ptr );
MemWrite( &item.callstackFrameSize.size, frameData.size );
MemWrite( &item.callstackFrameSize.imageName, (uint64_t)frameData.imageName );
AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrameSize] );
}
@ -2336,23 +2438,14 @@ void Profiler::SendCallstackFrame( uint64_t ptr )
{
const auto& frame = frameData.data[i];
SendString( uint64_t( frame.name ), frame.name, QueueType::CustomStringData );
SendString( uint64_t( frame.file ), frame.file, QueueType::CustomStringData );
SendSingleString( frame.name );
SendSecondString( frame.file );
QueueItem item;
MemWrite( &item.hdr.type, QueueType::CallstackFrame );
MemWrite( &item.callstackFrame.name, (uint64_t)frame.name );
MemWrite( &item.callstackFrame.file, (uint64_t)frame.file );
MemWrite( &item.callstackFrame.line, frame.line );
MemWrite( &item.callstackFrame.symAddr, frame.symAddr );
if( frame.symLen > ( 1 << 24 ) )
{
memset( item.callstackFrame.symLen, 0, 3 );
}
else
{
memcpy( item.callstackFrame.symLen, &frame.symLen, 3 );
}
MemWrite( &item.callstackFrame.symLen, frame.symLen );
AppendData( &item, QueueDataSize[(int)QueueType::CallstackFrame] );
@ -2383,7 +2476,7 @@ bool Profiler::HandleServerQuery()
case ServerQueryThreadString:
if( ptr == m_mainThread )
{
SendString( ptr, "Main thread", QueueType::ThreadName );
SendString( ptr, "Main thread", 11, QueueType::ThreadName );
}
else
{
@ -2418,9 +2511,11 @@ bool Profiler::HandleServerQuery()
case ServerQuerySymbol:
HandleSymbolQuery( ptr );
break;
#ifndef TRACY_NO_CODE_TRANSFER
case ServerQuerySymbolCode:
HandleSymbolCodeQuery( ptr, extra );
break;
#endif
case ServerQueryCodeLocation:
SendCodeLocation( ptr );
break;
@ -2517,9 +2612,6 @@ void Profiler::HandleDisconnect()
void Profiler::CalibrateTimer()
{
#ifdef TRACY_HW_TIMER
# if !defined TARGET_OS_IOS && __ARM_ARCH >= 6
m_timerMul = 1.;
# else
std::atomic_signal_fence( std::memory_order_acq_rel );
const auto t0 = std::chrono::high_resolution_clock::now();
const auto r0 = GetTime();
@ -2534,7 +2626,6 @@ void Profiler::CalibrateTimer()
const auto dr = r1 - r0;
m_timerMul = double( dt ) / double( dr );
# endif
#else
m_timerMul = 1.;
#endif
@ -2542,7 +2633,7 @@ void Profiler::CalibrateTimer()
void Profiler::CalibrateDelay()
{
enum { Iterations = 50000 };
constexpr int Iterations = 50000;
auto mindiff = std::numeric_limits<int64_t>::max();
for( int i=0; i<Iterations * 10; i++ )
@ -2557,7 +2648,7 @@ void Profiler::CalibrateDelay()
#ifdef TRACY_DELAYED_INIT
m_delay = m_resolution;
#else
enum { Events = Iterations * 2 }; // start + end
constexpr int Events = Iterations * 2; // start + end
static_assert( Events < QueuePrealloc, "Delay calibration loop will allocate memory in queue" );
static const tracy::SourceLocationData __tracy_source_location { nullptr, __FUNCTION__, __FILE__, (uint32_t)__LINE__, 0 };
@ -2692,12 +2783,16 @@ void Profiler::ReportTopology()
sprintf( path, "%s%i/topology/physical_package_id", basePath, i );
char buf[1024];
FILE* f = fopen( path, "rb" );
if( !f )
{
tracy_free( cpuData );
return;
}
auto read = fread( buf, 1, 1024, f );
buf[read] = '\0';
fclose( f );
cpuData[i].package = uint32_t( atoi( buf ) );
cpuData[i].thread = i;
sprintf( path, "%s%i/topology/core_id", basePath, i );
f = fopen( path, "rb" );
read = fread( buf, 1, 1024, f );
@ -2730,11 +2825,10 @@ void Profiler::ReportTopology()
void Profiler::SendCallstack( int depth, const char* skipBefore )
{
#ifdef TRACY_HAS_CALLSTACK
TracyLfqPrepare( QueueType::Callstack );
auto ptr = Callstack( depth );
CutCallstack( ptr, skipBefore );
TracyLfqPrepare( QueueType::Callstack );
MemWrite( &item->callstack.ptr, (uint64_t)ptr );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyLfqCommit;
#endif
}
@ -2800,11 +2894,10 @@ void Profiler::HandleSymbolQuery( uint64_t symbol )
#ifdef TRACY_HAS_CALLSTACK
const auto sym = DecodeSymbolAddress( symbol );
SendString( uint64_t( sym.file ), sym.file, QueueType::CustomStringData );
SendSingleString( sym.file );
QueueItem item;
MemWrite( &item.hdr.type, QueueType::SymbolInformation );
MemWrite( &item.symbolInformation.file, uint64_t( sym.file ) );
MemWrite( &item.symbolInformation.line, sym.line );
MemWrite( &item.symbolInformation.symAddr, symbol );
@ -2824,12 +2917,11 @@ void Profiler::SendCodeLocation( uint64_t ptr )
#ifdef TRACY_HAS_CALLSTACK
const auto sym = DecodeCodeAddress( ptr );
SendString( uint64_t( sym.file ), sym.file, QueueType::CustomStringData );
SendSingleString( sym.file );
QueueItem item;
MemWrite( &item.hdr.type, QueueType::CodeInformation );
MemWrite( &item.codeInformation.ptr, ptr );
MemWrite( &item.codeInformation.file, uint64_t( sym.file ) );
MemWrite( &item.codeInformation.line, sym.line );
AppendData( &item, QueueDataSize[(int)QueueType::CodeInformation] );
@ -2900,14 +2992,13 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_callstack( const struct ___trac
TracyLfqCommitC;
}
#endif
tracy::GetProfiler().SendCallstack( depth );
{
TracyLfqPrepareC( tracy::QueueType::ZoneBeginCallstack );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommitC;
}
tracy::GetProfiler().SendCallstack( depth );
return ctx;
}
@ -2966,14 +3057,13 @@ TRACY_API TracyCZoneCtx ___tracy_emit_zone_begin_alloc_callstack( uint64_t srclo
TracyLfqCommitC;
}
#endif
tracy::GetProfiler().SendCallstack( depth );
{
TracyLfqPrepareC( tracy::QueueType::ZoneBeginAllocSrcLocCallstack );
tracy::MemWrite( &item->zoneBegin.time, tracy::Profiler::GetTime() );
tracy::MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyLfqCommitC;
}
tracy::GetProfiler().SendCallstack( depth );
return ctx;
}
@ -2996,10 +3086,10 @@ TRACY_API void ___tracy_emit_zone_end( TracyCZoneCtx ctx )
TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
if( !ctx.active ) return;
auto ptr = (char*)tracy::tracy_malloc( size+1 );
auto ptr = (char*)tracy::tracy_malloc( size );
memcpy( ptr, txt, size );
ptr[size] = '\0';
#ifndef TRACY_NO_VERIFY
{
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
@ -3009,17 +3099,18 @@ TRACY_API void ___tracy_emit_zone_text( TracyCZoneCtx ctx, const char* txt, size
#endif
{
TracyLfqPrepareC( tracy::QueueType::ZoneText );
tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr );
tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommitC;
}
}
TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
if( !ctx.active ) return;
auto ptr = (char*)tracy::tracy_malloc( size+1 );
auto ptr = (char*)tracy::tracy_malloc( size );
memcpy( ptr, txt, size );
ptr[size] = '\0';
#ifndef TRACY_NO_VERIFY
{
TracyLfqPrepareC( tracy::QueueType::ZoneValidation );
@ -3029,7 +3120,8 @@ TRACY_API void ___tracy_emit_zone_name( TracyCZoneCtx ctx, const char* txt, size
#endif
{
TracyLfqPrepareC( tracy::QueueType::ZoneName );
tracy::MemWrite( &item->zoneText.text, (uint64_t)ptr );
tracy::MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
tracy::MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommitC;
}
}
@ -3051,10 +3143,10 @@ TRACY_API void ___tracy_emit_zone_value( TracyCZoneCtx ctx, uint64_t value )
}
}
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size ) { tracy::Profiler::MemAlloc( ptr, size ); }
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth ); }
TRACY_API void ___tracy_emit_memory_free( const void* ptr ) { tracy::Profiler::MemFree( ptr ); }
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth ) { tracy::Profiler::MemFreeCallstack( ptr, depth ); }
TRACY_API void ___tracy_emit_memory_alloc( const void* ptr, size_t size, int secure ) { tracy::Profiler::MemAlloc( ptr, size, secure != 0 ); }
TRACY_API void ___tracy_emit_memory_alloc_callstack( const void* ptr, size_t size, int depth, int secure ) { tracy::Profiler::MemAllocCallstack( ptr, size, depth, secure != 0 ); }
TRACY_API void ___tracy_emit_memory_free( const void* ptr, int secure ) { tracy::Profiler::MemFree( ptr, secure != 0 ); }
TRACY_API void ___tracy_emit_memory_free_callstack( const void* ptr, int depth, int secure ) { tracy::Profiler::MemFreeCallstack( ptr, depth, secure != 0 ); }
TRACY_API void ___tracy_emit_frame_mark( const char* name ) { tracy::Profiler::SendFrameMark( name ); }
TRACY_API void ___tracy_emit_frame_mark_start( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgStart ); }
TRACY_API void ___tracy_emit_frame_mark_end( const char* name ) { tracy::Profiler::SendFrameMark( name, tracy::QueueType::FrameMarkMsgEnd ); }
@ -3065,8 +3157,27 @@ TRACY_API void ___tracy_emit_messageL( const char* txt, int callstack ) { tracy:
TRACY_API void ___tracy_emit_messageC( const char* txt, size_t size, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, size, color, callstack ); }
TRACY_API void ___tracy_emit_messageLC( const char* txt, uint32_t color, int callstack ) { tracy::Profiler::MessageColor( txt, color, callstack ); }
TRACY_API void ___tracy_emit_message_appinfo( const char* txt, size_t size ) { tracy::Profiler::MessageAppInfo( txt, size ); }
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, const char* function ) { return tracy::Profiler::AllocSourceLocation( line, source, function ); }
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz ) { return tracy::Profiler::AllocSourceLocation( line, source, function, name, nameSz ); }
TRACY_API uint64_t ___tracy_alloc_srcloc( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz ) {
return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz );
}
TRACY_API uint64_t ___tracy_alloc_srcloc_name( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz ) {
return tracy::Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
}
// thread_locals are not initialized on thread creation. At least on GNU/Linux. Instead they are
// initialized on their first ODR-use. This means that the allocator is not automagically
// initialized every time a thread is created. As thus, expose to the C API users a simple API to
// call every time they create a thread. Here we can then put all sorts of per-thread
// initialization.
TRACY_API void ___tracy_init_thread(void) {
#ifdef TRACY_DELAYED_INIT
(void)tracy::GetProfilerThreadData();
#else
(void)tracy::s_rpmalloc_thread_init;
#endif
}
#ifdef __cplusplus
}

View File

@ -5,6 +5,7 @@
#include <atomic>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include "tracy_concurrentqueue.h"
#include "TracyCallstack.hpp"
@ -24,11 +25,11 @@
# include <mach/mach_time.h>
#endif
#if defined _WIN32 || defined __CYGWIN__ || ( ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) && !defined __ANDROID__ ) || __ARM_ARCH >= 6
#if defined _WIN32 || defined __CYGWIN__ || ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) || ( defined TARGET_OS_IOS && TARGET_OS_IOS == 1 )
# define TRACY_HW_TIMER
#endif
#if !defined TRACY_HW_TIMER || ( defined __ARM_ARCH && __ARM_ARCH >= 6 && !defined CLOCK_MONOTONIC_RAW )
#if !defined TRACY_HW_TIMER
#include <chrono>
#endif
@ -41,6 +42,10 @@
namespace tracy
{
#if defined(TRACY_DELAYED_INIT) && defined(TRACY_MANUAL_LIFETIME)
void StartupProfiler();
void ShutdownProfiler();
#endif
class GpuCtx;
class Profiler;
@ -59,6 +64,8 @@ TRACY_API std::atomic<uint8_t>& GetGpuCtxCounter();
TRACY_API GpuCtxWrapper& GetGpuCtx();
TRACY_API uint64_t GetThreadHandle();
TRACY_API void InitRPMallocThread();
TRACY_API bool ProfilerAvailable();
TRACY_API int64_t GetFrequencyQpc();
struct SourceLocationData
{
@ -106,7 +113,7 @@ class Profiler
struct FrameImageQueueItem
{
void* image;
uint64_t frame;
uint32_t frame;
uint16_t w;
uint16_t h;
uint8_t offset;
@ -117,19 +124,13 @@ public:
Profiler();
~Profiler();
void SpawnWorkerThreads();
static tracy_force_inline int64_t GetTime()
{
#ifdef TRACY_HW_TIMER
# if defined TARGET_OS_IOS && TARGET_OS_IOS == 1
return mach_absolute_time();
# elif defined __ARM_ARCH && __ARM_ARCH >= 6
# ifdef CLOCK_MONOTONIC_RAW
struct timespec ts;
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
# else
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
# endif
# elif defined _WIN32 || defined __CYGWIN__
# ifdef TRACY_TIMER_QPC
return GetTimeQpc();
@ -144,9 +145,17 @@ public:
uint64_t rax, rdx;
asm volatile ( "rdtsc" : "=a" (rax), "=d" (rdx) );
return ( rdx << 32 ) + rax;
# else
# error "TRACY_HW_TIMER detection logic needs fixing"
# endif
#else
# if defined __linux__ && defined CLOCK_MONOTONIC_RAW
struct timespec ts;
clock_gettime( CLOCK_MONOTONIC_RAW, &ts );
return int64_t( ts.tv_sec ) * 1000000000ll + int64_t( ts.tv_nsec );
# else
return std::chrono::duration_cast<std::chrono::nanoseconds>( std::chrono::high_resolution_clock::now().time_since_epoch() ).count();
# endif
#endif
}
@ -197,6 +206,7 @@ public:
static tracy_force_inline void SendFrameImage( const void* image, uint16_t w, uint16_t h, uint8_t offset, bool flip )
{
auto& profiler = GetProfiler();
assert( profiler.m_frameCount.load( std::memory_order_relaxed ) < std::numeric_limits<uint32_t>::max() );
#ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
#endif
@ -207,7 +217,7 @@ public:
profiler.m_fiLock.lock();
auto fi = profiler.m_fiQueue.prepare_next();
fi->image = ptr;
fi->frame = profiler.m_frameCount.load( std::memory_order_relaxed ) - offset;
fi->frame = uint32_t( profiler.m_frameCount.load( std::memory_order_relaxed ) - offset );
fi->w = w;
fi->h = h;
fi->flip = flip;
@ -269,18 +279,19 @@ public:
static tracy_force_inline void Message( const char* txt, size_t size, int callstack )
{
assert( size < std::numeric_limits<uint16_t>::max() );
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::Message : QueueType::MessageCallstack );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
MemWrite( &item->messageFat.time, GetTime() );
MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
TracyLfqCommit;
}
static tracy_force_inline void Message( const char* txt, int callstack )
@ -288,31 +299,32 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)txt );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteral : QueueType::MessageLiteralCallstack );
MemWrite( &item->messageLiteral.time, GetTime() );
MemWrite( &item->messageLiteral.text, (uint64_t)txt );
TracyLfqCommit;
}
static tracy_force_inline void MessageColor( const char* txt, size_t size, uint32_t color, int callstack )
{
assert( size < std::numeric_limits<uint16_t>::max() );
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
auto ptr = (char*)tracy_malloc( size+1 );
memcpy( ptr, txt, size );
ptr[size] = '\0';
MemWrite( &item->messageColor.time, GetTime() );
MemWrite( &item->messageColor.text, (uint64_t)ptr );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageColor : QueueType::MessageColorCallstack );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
MemWrite( &item->messageColorFat.time, GetTime() );
MemWrite( &item->messageColorFat.text, (uint64_t)ptr );
MemWrite( &item->messageColorFat.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColorFat.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColorFat.b, uint8_t( ( color >> 16 ) & 0xFF ) );
MemWrite( &item->messageColorFat.size, (uint16_t)size );
TracyLfqCommit;
}
static tracy_force_inline void MessageColor( const char* txt, uint32_t color, int callstack )
@ -320,26 +332,27 @@ public:
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
MemWrite( &item->messageColor.time, GetTime() );
MemWrite( &item->messageColor.text, (uint64_t)txt );
MemWrite( &item->messageColor.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColor.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColor.b, uint8_t( ( color >> 16 ) & 0xFF ) );
TracyLfqCommit;
if( callstack != 0 ) tracy::GetProfiler().SendCallstack( callstack );
TracyLfqPrepare( callstack == 0 ? QueueType::MessageLiteralColor : QueueType::MessageLiteralColorCallstack );
MemWrite( &item->messageColorLiteral.time, GetTime() );
MemWrite( &item->messageColorLiteral.text, (uint64_t)txt );
MemWrite( &item->messageColorLiteral.r, uint8_t( ( color ) & 0xFF ) );
MemWrite( &item->messageColorLiteral.g, uint8_t( ( color >> 8 ) & 0xFF ) );
MemWrite( &item->messageColorLiteral.b, uint8_t( ( color >> 16 ) & 0xFF ) );
TracyLfqCommit;
}
static tracy_force_inline void MessageAppInfo( const char* txt, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
InitRPMallocThread();
auto ptr = (char*)tracy_malloc( size+1 );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::MessageAppInfo );
MemWrite( &item->message.time, GetTime() );
MemWrite( &item->message.text, (uint64_t)ptr );
MemWrite( &item->messageFat.time, GetTime() );
MemWrite( &item->messageFat.text, (uint64_t)ptr );
MemWrite( &item->messageFat.size, (uint16_t)size );
#ifdef TRACY_ON_DEMAND
GetProfiler().DeferItem( *item );
@ -348,8 +361,9 @@ public:
TracyLfqCommit;
}
static tracy_force_inline void MemAlloc( const void* ptr, size_t size )
static tracy_force_inline void MemAlloc( const void* ptr, size_t size, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
@ -360,8 +374,9 @@ public:
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemFree( const void* ptr )
static tracy_force_inline void MemFree( const void* ptr, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
@ -372,8 +387,9 @@ public:
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth )
static tracy_force_inline void MemAllocCallstack( const void* ptr, size_t size, int depth, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
@ -385,16 +401,17 @@ public:
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
SendCallstackMemory( callstack );
SendMemAlloc( QueueType::MemAllocCallstack, thread, ptr, size );
profiler.m_serialLock.unlock();
#else
MemAlloc( ptr, size );
MemAlloc( ptr, size, secure );
#endif
}
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth )
static tracy_force_inline void MemFreeCallstack( const void* ptr, int depth, bool secure )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
@ -406,11 +423,85 @@ public:
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
SendCallstackMemory( callstack );
SendMemFree( QueueType::MemFreeCallstack, thread, ptr );
profiler.m_serialLock.unlock();
#else
MemFree( ptr );
MemFree( ptr, secure );
#endif
}
static tracy_force_inline void MemAllocNamed( const void* ptr, size_t size, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
GetProfiler().m_serialLock.lock();
SendMemName( name );
SendMemAlloc( QueueType::MemAllocNamed, thread, ptr, size );
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemFreeNamed( const void* ptr, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto thread = GetThreadHandle();
GetProfiler().m_serialLock.lock();
SendMemName( name );
SendMemFree( QueueType::MemFreeNamed, thread, ptr );
GetProfiler().m_serialLock.unlock();
}
static tracy_force_inline void MemAllocCallstackNamed( const void* ptr, size_t size, int depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
InitRPMallocThread();
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackMemory( callstack );
SendMemName( name );
SendMemAlloc( QueueType::MemAllocCallstackNamed, thread, ptr, size );
profiler.m_serialLock.unlock();
#else
MemAlloc( ptr, size, secure );
#endif
}
static tracy_force_inline void MemFreeCallstackNamed( const void* ptr, int depth, bool secure, const char* name )
{
if( secure && !ProfilerAvailable() ) return;
#ifdef TRACY_HAS_CALLSTACK
auto& profiler = GetProfiler();
# ifdef TRACY_ON_DEMAND
if( !profiler.IsConnected() ) return;
# endif
const auto thread = GetThreadHandle();
InitRPMallocThread();
auto callstack = Callstack( depth );
profiler.m_serialLock.lock();
SendCallstackMemory( callstack );
SendMemName( name );
SendMemFree( QueueType::MemFreeCallstackNamed, thread, ptr );
profiler.m_serialLock.unlock();
#else
MemFree( ptr, secure );
#endif
}
@ -419,7 +510,7 @@ public:
#ifdef TRACY_HAS_CALLSTACK
auto ptr = Callstack( depth );
TracyLfqPrepare( QueueType::Callstack );
MemWrite( &item->callstack.ptr, (uint64_t)ptr );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
TracyLfqCommit;
#endif
}
@ -445,12 +536,12 @@ public:
static bool ShouldExit();
#ifdef TRACY_ON_DEMAND
tracy_force_inline bool IsConnected() const
{
return m_isConnected.load( std::memory_order_acquire );
}
#ifdef TRACY_ON_DEMAND
tracy_force_inline uint64_t ConnectionId() const
{
return m_connectionId.load( std::memory_order_acquire );
@ -468,11 +559,16 @@ public:
void RequestShutdown() { m_shutdown.store( true, std::memory_order_relaxed ); m_shutdownManual.store( true, std::memory_order_relaxed ); }
bool HasShutdownFinished() const { return m_shutdownFinished.load( std::memory_order_relaxed ); }
void SendString( uint64_t ptr, const char* str, QueueType type );
void SendString( uint64_t str, const char* ptr, QueueType type ) { SendString( str, ptr, strlen( ptr ), type ); }
void SendString( uint64_t str, const char* ptr, size_t len, QueueType type );
void SendSingleString( const char* ptr ) { SendSingleString( ptr, strlen( ptr ) ); }
void SendSingleString( const char* ptr, size_t len );
void SendSecondString( const char* ptr ) { SendSecondString( ptr, strlen( ptr ) ); }
void SendSecondString( const char* ptr, size_t len );
// Allocated source location data layout:
// 4b payload size
// 2b payload size
// 4b color
// 4b source line
// fsz function name
@ -483,30 +579,36 @@ public:
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function )
{
const auto fsz = strlen( function );
const auto ssz = strlen( source );
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memset( ptr + 4, 0, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, function, fsz+1 );
memcpy( ptr + 12 + fsz + 1, source, ssz + 1 );
return uint64_t( ptr );
return AllocSourceLocation( line, source, function, nullptr, 0 );
}
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, const char* function, const char* name, size_t nameSz )
{
const auto fsz = strlen( function );
const auto ssz = strlen( source );
const uint32_t sz = uint32_t( 4 + 4 + 4 + fsz + 1 + ssz + 1 + nameSz );
return AllocSourceLocation( line, source, strlen(source), function, strlen(function), name, nameSz );
}
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz )
{
return AllocSourceLocation( line, source, sourceSz, function, functionSz, nullptr, 0 );
}
static tracy_force_inline uint64_t AllocSourceLocation( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz )
{
const auto sz32 = uint32_t( 2 + 4 + 4 + functionSz + 1 + sourceSz + 1 + nameSz );
assert( sz32 <= std::numeric_limits<uint16_t>::max() );
const auto sz = uint16_t( sz32 );
auto ptr = (char*)tracy_malloc( sz );
memcpy( ptr, &sz, 4 );
memset( ptr + 4, 0, 4 );
memcpy( ptr + 8, &line, 4 );
memcpy( ptr + 12, function, fsz+1 );
memcpy( ptr + 12 + fsz + 1, source, ssz + 1 );
memcpy( ptr + 12 + fsz + 1 + ssz + 1, name, nameSz );
memcpy( ptr, &sz, 2 );
memset( ptr + 2, 0, 4 );
memcpy( ptr + 6, &line, 4 );
memcpy( ptr + 10, function, functionSz );
ptr[10 + functionSz] = '\0';
memcpy( ptr + 10 + functionSz + 1, source, sourceSz );
ptr[10 + functionSz + 1 + sourceSz] = '\0';
if( nameSz != 0 )
{
memcpy( ptr + 10 + functionSz + 1 + sourceSz + 1, name, nameSz );
}
return uint64_t( ptr );
}
@ -575,14 +677,14 @@ private:
#ifdef TRACY_HAS_CALLSTACK
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::CallstackMemory );
MemWrite( &item->callstackMemory.ptr, (uint64_t)ptr );
MemWrite( &item->callstackFat.ptr, (uint64_t)ptr );
GetProfiler().m_serialQueue.commit_next();
#endif
}
static tracy_force_inline void SendMemAlloc( QueueType type, const uint64_t thread, const void* ptr, size_t size )
{
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack );
assert( type == QueueType::MemAlloc || type == QueueType::MemAllocCallstack || type == QueueType::MemAllocNamed || type == QueueType::MemAllocCallstackNamed );
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type );
@ -605,7 +707,7 @@ private:
static tracy_force_inline void SendMemFree( QueueType type, const uint64_t thread, const void* ptr )
{
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack );
assert( type == QueueType::MemFree || type == QueueType::MemFreeCallstack || type == QueueType::MemFreeNamed || type == QueueType::MemFreeCallstackNamed );
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, type );
@ -615,6 +717,15 @@ private:
GetProfiler().m_serialQueue.commit_next();
}
static tracy_force_inline void SendMemName( const char* name )
{
assert( name );
auto item = GetProfiler().m_serialQueue.prepare_next();
MemWrite( &item->hdr.type, QueueType::MemNamePayload );
MemWrite( &item->memName.name, (uint64_t)name );
GetProfiler().m_serialQueue.commit_next();
}
#if ( defined _WIN32 || defined __CYGWIN__ ) && defined TRACY_TIMER_QPC
static int64_t GetTimeQpc();
#endif
@ -655,8 +766,8 @@ private:
TracyMutex m_fiLock;
std::atomic<uint64_t> m_frameCount;
#ifdef TRACY_ON_DEMAND
std::atomic<bool> m_isConnected;
#ifdef TRACY_ON_DEMAND
std::atomic<uint64_t> m_connectionId;
TracyMutex m_deferredLock;

116
deps/tracy/client/TracyRingBuffer.hpp vendored Normal file
View File

@ -0,0 +1,116 @@
namespace tracy
{
template<size_t Size>
class RingBuffer
{
public:
RingBuffer( int fd )
: m_fd( fd )
{
const auto pageSize = uint32_t( getpagesize() );
assert( Size >= pageSize );
assert( __builtin_popcount( Size ) == 1 );
m_mapSize = Size + pageSize;
auto mapAddr = mmap( nullptr, m_mapSize, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 );
if( !mapAddr )
{
m_fd = 0;
close( fd );
return;
}
m_metadata = (perf_event_mmap_page*)mapAddr;
assert( m_metadata->data_offset == pageSize );
m_buffer = ((char*)mapAddr) + pageSize;
}
~RingBuffer()
{
if( m_metadata ) munmap( m_metadata, m_mapSize );
if( m_fd ) close( m_fd );
}
RingBuffer( const RingBuffer& ) = delete;
RingBuffer& operator=( const RingBuffer& ) = delete;
RingBuffer( RingBuffer&& other )
{
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
m_metadata = nullptr;
m_fd = 0;
}
RingBuffer& operator=( RingBuffer&& other )
{
memcpy( (char*)&other, (char*)this, sizeof( RingBuffer ) );
m_metadata = nullptr;
m_fd = 0;
return *this;
}
bool IsValid() const { return m_metadata != nullptr; }
void Enable()
{
ioctl( m_fd, PERF_EVENT_IOC_ENABLE, 0 );
}
bool HasData() const
{
const auto head = LoadHead();
return head > m_metadata->data_tail;
}
void Read( void* dst, uint64_t offset, uint64_t cnt )
{
auto src = ( m_metadata->data_tail + offset ) % Size;
if( src + cnt <= Size )
{
memcpy( dst, m_buffer + src, cnt );
}
else
{
const auto s0 = Size - src;
memcpy( dst, m_buffer + src, s0 );
memcpy( (char*)dst + s0, m_buffer, cnt - s0 );
}
}
void Advance( uint64_t cnt )
{
StoreTail( m_metadata->data_tail + cnt );
}
bool CheckTscCaps() const
{
return m_metadata->cap_user_time_zero;
}
int64_t ConvertTimeToTsc( int64_t timestamp ) const
{
assert( m_metadata->cap_user_time_zero );
const auto time = timestamp - m_metadata->time_zero;
const auto quot = time / m_metadata->time_mult;
const auto rem = time % m_metadata->time_mult;
return ( quot << m_metadata->time_shift ) + ( rem << m_metadata->time_shift ) / m_metadata->time_mult;
}
private:
uint64_t LoadHead() const
{
return std::atomic_load_explicit( (const volatile std::atomic<uint64_t>*)&m_metadata->data_head, std::memory_order_acquire );
}
void StoreTail( uint64_t tail )
{
std::atomic_store_explicit( (volatile std::atomic<uint64_t>*)&m_metadata->data_tail, tail, std::memory_order_release );
}
perf_event_mmap_page* m_metadata;
char* m_buffer;
size_t m_mapSize;
int m_fd;
};
}

View File

@ -1,6 +1,7 @@
#ifndef __TRACYSCOPED_HPP__
#define __TRACYSCOPED_HPP__
#include <limits>
#include <stdint.h>
#include <string.h>
@ -15,6 +16,11 @@ namespace tracy
class ScopedZone
{
public:
ScopedZone( const ScopedZone& ) = delete;
ScopedZone( ScopedZone&& ) = delete;
ScopedZone& operator=( const ScopedZone& ) = delete;
ScopedZone& operator=( ScopedZone&& ) = delete;
tracy_force_inline ScopedZone( const SourceLocationData* srcloc, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
@ -43,12 +49,50 @@ public:
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::ZoneBeginCallstack );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, (uint64_t)srcloc );
TracyLfqCommit;
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLoc );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyLfqCommit;
}
tracy_force_inline ScopedZone( uint32_t line, const char* source, size_t sourceSz, const char* function, size_t functionSz, const char* name, size_t nameSz, int depth, bool is_active = true )
#ifdef TRACY_ON_DEMAND
: m_active( is_active && GetProfiler().IsConnected() )
#else
: m_active( is_active )
#endif
{
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
m_connectionId = GetProfiler().ConnectionId();
#endif
GetProfiler().SendCallstack( depth );
TracyLfqPrepare( QueueType::ZoneBeginAllocSrcLocCallstack );
const auto srcloc = Profiler::AllocSourceLocation( line, source, sourceSz, function, functionSz, name, nameSz );
MemWrite( &item->zoneBegin.time, Profiler::GetTime() );
MemWrite( &item->zoneBegin.srcloc, srcloc );
TracyLfqCommit;
}
tracy_force_inline ~ScopedZone()
@ -64,29 +108,31 @@ public:
tracy_force_inline void Text( const char* txt, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
auto ptr = (char*)tracy_malloc( size+1 );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::ZoneText );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommit;
}
tracy_force_inline void Name( const char* txt, size_t size )
{
assert( size < std::numeric_limits<uint16_t>::max() );
if( !m_active ) return;
#ifdef TRACY_ON_DEMAND
if( GetProfiler().ConnectionId() != m_connectionId ) return;
#endif
auto ptr = (char*)tracy_malloc( size+1 );
auto ptr = (char*)tracy_malloc( size );
memcpy( ptr, txt, size );
ptr[size] = '\0';
TracyLfqPrepare( QueueType::ZoneName );
MemWrite( &item->zoneText.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.text, (uint64_t)ptr );
MemWrite( &item->zoneTextFat.size, (uint16_t)size );
TracyLfqCommit;
}

View File

@ -21,23 +21,28 @@
# include "../common/TracyAlloc.hpp"
# include "../common/TracySystem.hpp"
# include "TracyProfiler.hpp"
# include "TracyThread.hpp"
namespace tracy
{
DEFINE_GUID ( /* ce1dbfb4-137e-4da6-87b0-3f59aa102cbc */
PerfInfoGuid,
0xce1dbfb4,
0x137e,
0x4da6,
0x87, 0xb0, 0x3f, 0x59, 0xaa, 0x10, 0x2c, 0xbc
);
struct __declspec(uuid("{ce1dbfb4-137e-4da6-87b0-3f59aa102cbc}")) PERFINFOGUID;
static const auto PerfInfoGuid = __uuidof(PERFINFOGUID);
struct __declspec(uuid("{802EC45A-1E99-4B83-9920-87C98277BA9D}")) DXGKRNLGUID;
static const auto DxgKrnlGuid = __uuidof(DXGKRNLGUID);
static TRACEHANDLE s_traceHandle;
static TRACEHANDLE s_traceHandle2;
static EVENT_TRACE_PROPERTIES* s_prop;
static DWORD s_pid;
static EVENT_TRACE_PROPERTIES* s_propVsync;
static TRACEHANDLE s_traceHandleVsync;
static TRACEHANDLE s_traceHandleVsync2;
Thread* s_threadVsync = nullptr;
struct CSwitch
{
uint32_t newThreadId;
@ -85,6 +90,19 @@ struct StackWalkEvent
uint64_t stack[192];
};
struct VSyncInfo
{
void* dxgAdapter;
uint32_t vidPnTargetId;
uint64_t scannedPhysicalAddress;
uint32_t vidPnSourceId;
uint32_t frameNumber;
int64_t frameQpcTime;
void* hFlipDevice;
uint32_t flipType;
uint64_t flipFenceId;
};
#ifdef __CYGWIN__
extern "C" typedef DWORD (WINAPI *t_GetProcessIdOfThread)( HANDLE );
extern "C" typedef DWORD (WINAPI *t_GetProcessImageFileNameA)( HANDLE, LPSTR, DWORD );
@ -168,9 +186,9 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
memcpy( trace, &sz, sizeof( uint64_t ) );
memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz );
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSample.time, sw->eventTimeStamp );
MemWrite( &item->callstackSample.thread, (uint64_t)sw->stackThread );
MemWrite( &item->callstackSample.ptr, (uint64_t)trace );
MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp );
MemWrite( &item->callstackSampleFat.thread, (uint64_t)sw->stackThread );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
}
@ -181,6 +199,136 @@ void WINAPI EventRecordCallback( PEVENT_RECORD record )
}
}
static constexpr const char* VsyncName[] = {
"[0] Vsync",
"[1] Vsync",
"[2] Vsync",
"[3] Vsync",
"[4] Vsync",
"[5] Vsync",
"[6] Vsync",
"[7] Vsync",
"Vsync"
};
static uint32_t VsyncTarget[8] = {};
void WINAPI EventRecordCallbackVsync( PEVENT_RECORD record )
{
#ifdef TRACY_ON_DEMAND
if( !GetProfiler().IsConnected() ) return;
#endif
const auto& hdr = record->EventHeader;
assert( hdr.ProviderId.Data1 == 0x802EC45A );
assert( hdr.EventDescriptor.Id == 0x0011 );
const auto vs = (const VSyncInfo*)record->UserData;
int idx = 0;
do
{
if( VsyncTarget[idx] == 0 )
{
VsyncTarget[idx] = vs->vidPnTargetId;
break;
}
else if( VsyncTarget[idx] == vs->vidPnTargetId )
{
break;
}
}
while( ++idx < 8 );
TracyLfqPrepare( QueueType::FrameMarkMsg );
MemWrite( &item->frameMark.time, hdr.TimeStamp.QuadPart );
MemWrite( &item->frameMark.name, uint64_t( VsyncName[idx] ) );
TracyLfqCommit;
}
static void SetupVsync()
{
#if _WIN32_WINNT >= _WIN32_WINNT_WINBLUE
const auto psz = sizeof( EVENT_TRACE_PROPERTIES ) + MAX_PATH;
s_propVsync = (EVENT_TRACE_PROPERTIES*)tracy_malloc( psz );
memset( s_propVsync, 0, sizeof( EVENT_TRACE_PROPERTIES ) );
s_propVsync->LogFileMode = EVENT_TRACE_REAL_TIME_MODE;
s_propVsync->Wnode.BufferSize = psz;
#ifdef TRACY_TIMER_QPC
s_propVsync->Wnode.ClientContext = 1;
#else
s_propVsync->Wnode.ClientContext = 3;
#endif
s_propVsync->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
strcpy( ((char*)s_propVsync) + sizeof( EVENT_TRACE_PROPERTIES ), "TracyVsync" );
auto backup = tracy_malloc( psz );
memcpy( backup, s_propVsync, psz );
const auto controlStatus = ControlTraceA( 0, "TracyVsync", s_propVsync, EVENT_TRACE_CONTROL_STOP );
if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
{
tracy_free( backup );
tracy_free( s_propVsync );
return;
}
memcpy( s_propVsync, backup, psz );
tracy_free( backup );
const auto startStatus = StartTraceA( &s_traceHandleVsync, "TracyVsync", s_propVsync );
if( startStatus != ERROR_SUCCESS )
{
tracy_free( s_propVsync );
return;
}
EVENT_FILTER_EVENT_ID fe = {};
fe.FilterIn = TRUE;
fe.Count = 1;
fe.Events[0] = 0x0011; // VSyncDPC_Info
EVENT_FILTER_DESCRIPTOR desc = {};
desc.Ptr = (ULONGLONG)&fe;
desc.Size = sizeof( fe );
desc.Type = EVENT_FILTER_TYPE_EVENT_ID;
ENABLE_TRACE_PARAMETERS params = {};
params.Version = ENABLE_TRACE_PARAMETERS_VERSION_2;
params.EnableProperty = EVENT_ENABLE_PROPERTY_IGNORE_KEYWORD_0;
params.SourceId = s_propVsync->Wnode.Guid;
params.EnableFilterDesc = &desc;
params.FilterDescCount = 1;
uint64_t mask = 0x4000000000000001; // Microsoft_Windows_DxgKrnl_Performance | Base
EnableTraceEx2( s_traceHandleVsync, &DxgKrnlGuid, EVENT_CONTROL_CODE_ENABLE_PROVIDER, TRACE_LEVEL_INFORMATION, mask, mask, 0, &params );
char loggerName[MAX_PATH];
strcpy( loggerName, "TracyVsync" );
EVENT_TRACE_LOGFILEA log = {};
log.LoggerName = loggerName;
log.ProcessTraceMode = PROCESS_TRACE_MODE_REAL_TIME | PROCESS_TRACE_MODE_EVENT_RECORD | PROCESS_TRACE_MODE_RAW_TIMESTAMP;
log.EventRecordCallback = EventRecordCallbackVsync;
s_traceHandleVsync2 = OpenTraceA( &log );
if( s_traceHandleVsync2 == (TRACEHANDLE)INVALID_HANDLE_VALUE )
{
CloseTrace( s_traceHandleVsync );
tracy_free( s_propVsync );
return;
}
s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_threadVsync) Thread( [] (void*) {
ThreadExitHandler threadExitHandler;
SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
SetThreadName( "Tracy Vsync" );
ProcessTrace( &s_traceHandleVsync2, 1, nullptr, nullptr );
}, nullptr );
#endif
}
bool SysTraceStart( int64_t& samplingPeriod )
{
if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" );
@ -233,6 +381,8 @@ bool SysTraceStart( int64_t& samplingPeriod )
#endif
s_prop->Wnode.Guid = SystemTraceControlGuid;
s_prop->BufferSize = 1024;
s_prop->MinimumBuffers = std::thread::hardware_concurrency() * 4;
s_prop->MaximumBuffers = std::thread::hardware_concurrency() * 6;
s_prop->LoggerNameOffset = sizeof( EVENT_TRACE_PROPERTIES );
memcpy( ((char*)s_prop) + sizeof( EVENT_TRACE_PROPERTIES ), KERNEL_LOGGER_NAME, sizeof( KERNEL_LOGGER_NAME ) );
@ -242,6 +392,7 @@ bool SysTraceStart( int64_t& samplingPeriod )
const auto controlStatus = ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
if( controlStatus != ERROR_SUCCESS && controlStatus != ERROR_WMI_INSTANCE_NOT_FOUND )
{
tracy_free( backup );
tracy_free( s_prop );
return false;
}
@ -288,17 +439,29 @@ bool SysTraceStart( int64_t& samplingPeriod )
return false;
}
SetupVsync();
return true;
}
void SysTraceStop()
{
if( s_threadVsync )
{
CloseTrace( s_traceHandleVsync2 );
CloseTrace( s_traceHandleVsync );
s_threadVsync->~Thread();
tracy_free( s_threadVsync );
}
CloseTrace( s_traceHandle2 );
CloseTrace( s_traceHandle );
}
void SysTraceWorker( void* ptr )
{
ThreadExitHandler threadExitHandler;
SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL );
SetThreadName( "Tracy SysTrace" );
ProcessTrace( &s_traceHandle2, 1, 0, 0 );
ControlTrace( 0, KERNEL_LOGGER_NAME, s_prop, EVENT_TRACE_CONTROL_STOP );
@ -323,7 +486,7 @@ void SysTraceSendExternalName( uint64_t thread )
auto ret = wcstombs( buf, tmp, 256 );
if( ret != 0 )
{
GetProfiler().SendString( thread, buf, QueueType::ExternalThreadName );
GetProfiler().SendString( thread, buf, ret, QueueType::ExternalThreadName );
threadSent = true;
}
}
@ -351,9 +514,10 @@ void SysTraceSendExternalName( uint64_t thread )
if( (uint64_t)ptr >= (uint64_t)info.lpBaseOfDll && (uint64_t)ptr <= (uint64_t)info.lpBaseOfDll + (uint64_t)info.SizeOfImage )
{
char buf2[1024];
if( _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 ) != 0 )
const auto modlen = _GetModuleBaseNameA( phnd, modules[i], buf2, 1024 );
if( modlen != 0 )
{
GetProfiler().SendString( thread, buf2, QueueType::ExternalThreadName );
GetProfiler().SendString( thread, buf2, modlen, QueueType::ExternalThreadName );
threadSent = true;
}
}
@ -367,7 +531,7 @@ void SysTraceSendExternalName( uint64_t thread )
CloseHandle( hnd );
if( !threadSent )
{
GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName );
GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
threadSent = true;
}
if( pid != 0 )
@ -381,7 +545,7 @@ void SysTraceSendExternalName( uint64_t thread )
}
if( pid == 4 )
{
GetProfiler().SendString( thread, "System", QueueType::ExternalName );
GetProfiler().SendString( thread, "System", 6, QueueType::ExternalName );
return;
}
else
@ -407,9 +571,9 @@ void SysTraceSendExternalName( uint64_t thread )
if( !threadSent )
{
GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName );
GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
}
GetProfiler().SendString( thread, "???", QueueType::ExternalName );
GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName );
}
}
@ -428,8 +592,15 @@ void SysTraceSendExternalName( uint64_t thread )
# include <string.h>
# include <unistd.h>
# include <atomic>
# include <thread>
# include <linux/perf_event.h>
# include <linux/version.h>
# include <sys/mman.h>
# include <sys/ioctl.h>
# include "TracyProfiler.hpp"
# include "TracyRingBuffer.hpp"
# include "TracyThread.hpp"
# ifdef __ANDROID__
# include "TracySysTracePayload.hpp"
@ -449,6 +620,173 @@ static const char BufferSizeKb[] = "buffer_size_kb";
static const char TracePipe[] = "trace_pipe";
static std::atomic<bool> traceActive { false };
static Thread* s_threadSampling = nullptr;
static int s_numCpus = 0;
static constexpr size_t RingBufSize = 64*1024;
static RingBuffer<RingBufSize>* s_ring = nullptr;
static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags )
{
return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags );
}
static void SetupSampling( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
return;
#endif
samplingPeriod = 100*1000;
s_numCpus = (int)std::thread::hardware_concurrency();
s_ring = (RingBuffer<RingBufSize>*)tracy_malloc( sizeof( RingBuffer<RingBufSize> ) * s_numCpus );
perf_event_attr pe = {};
pe.type = PERF_TYPE_SOFTWARE;
pe.size = sizeof( perf_event_attr );
pe.config = PERF_COUNT_SW_CPU_CLOCK;
pe.sample_freq = 10000;
pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN;
#if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 )
pe.sample_max_stack = 127;
#endif
pe.exclude_callchain_kernel = 1;
pe.disabled = 1;
pe.freq = 1;
#if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
pe.use_clockid = 1;
pe.clockid = CLOCK_MONOTONIC_RAW;
#endif
for( int i=0; i<s_numCpus; i++ )
{
const int fd = perf_event_open( &pe, -1, i, -1, 0 );
if( fd == -1 )
{
for( int j=0; j<i; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
tracy_free( s_ring );
return;
}
new( s_ring+i ) RingBuffer<RingBufSize>( fd );
}
s_threadSampling = (Thread*)tracy_malloc( sizeof( Thread ) );
new(s_threadSampling) Thread( [] (void*) {
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy Sampling" );
sched_param sp = { 5 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
uint32_t currentPid = (uint32_t)getpid();
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
for( int i=0; i<s_numCpus; i++ )
{
if( !s_ring[i].CheckTscCaps() )
{
for( int j=0; j<s_numCpus; j++ ) s_ring[j].~RingBuffer<RingBufSize>();
tracy_free( s_ring );
const char* err = "Tracy Profiler: sampling is disabled due to non-native scheduler clock. Are you running under a VM?";
Profiler::MessageAppInfo( err, strlen( err ) );
return;
}
}
#endif
for( int i=0; i<s_numCpus; i++ ) s_ring[i].Enable();
for(;;)
{
bool hadData = false;
for( int i=0; i<s_numCpus; i++ )
{
if( !traceActive.load( std::memory_order_relaxed ) ) break;
if( !s_ring[i].HasData() ) continue;
hadData = true;
perf_event_header hdr;
s_ring[i].Read( &hdr, 0, sizeof( perf_event_header ) );
if( hdr.type == PERF_RECORD_SAMPLE )
{
uint32_t pid, tid;
uint64_t t0;
uint64_t cnt;
auto offset = sizeof( perf_event_header );
s_ring[i].Read( &pid, offset, sizeof( uint32_t ) );
if( pid == currentPid )
{
offset += sizeof( uint32_t );
s_ring[i].Read( &tid, offset, sizeof( uint32_t ) );
offset += sizeof( uint32_t );
s_ring[i].Read( &t0, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
s_ring[i].Read( &cnt, offset, sizeof( uint64_t ) );
offset += sizeof( uint64_t );
auto trace = (uint64_t*)tracy_malloc( ( 1 + cnt ) * sizeof( uint64_t ) );
s_ring[i].Read( trace+1, offset, sizeof( uint64_t ) * cnt );
// remove non-canonical pointers
do
{
const auto test = (int64_t)trace[cnt];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 == m2 ) break;
}
while( --cnt > 0 );
for( uint64_t j=1; j<cnt; j++ )
{
const auto test = (int64_t)trace[j];
const auto m1 = test >> 63;
const auto m2 = test >> 47;
if( m1 != m2 ) trace[j] = 0;
}
// skip kernel frames
uint64_t j;
for( j=0; j<cnt; j++ )
{
if( (int64_t)trace[j+1] >= 0 ) break;
}
if( j == cnt )
{
tracy_free( trace );
}
else
{
if( j > 0 )
{
cnt -= j;
memmove( trace+1, trace+1+j, sizeof( uint64_t ) * cnt );
}
memcpy( trace, &cnt, sizeof( uint64_t ) );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
t0 = s_ring[i].ConvertTimeToTsc( t0 );
#endif
TracyLfqPrepare( QueueType::CallstackSample );
MemWrite( &item->callstackSampleFat.time, t0 );
MemWrite( &item->callstackSampleFat.thread, (uint64_t)tid );
MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace );
TracyLfqCommit;
}
}
}
s_ring[i].Advance( hdr.size );
}
if( !hadData )
{
std::this_thread::sleep_for( std::chrono::milliseconds( 10 ) );
}
}
for( int i=0; i<s_numCpus; i++ ) s_ring[i].~RingBuffer<RingBufSize>();
tracy_free( s_ring );
}, nullptr );
}
#ifdef __ANDROID__
static bool TraceWrite( const char* path, size_t psz, const char* val, size_t vsz )
@ -525,6 +863,10 @@ void SysTraceInjectPayload()
bool SysTraceStart( int64_t& samplingPeriod )
{
#ifndef CLOCK_MONOTONIC_RAW
return false;
#endif
if( !TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 ) ) return false;
if( !TraceWrite( CurrentTracer, sizeof( CurrentTracer ), "nop", 4 ) ) return false;
TraceWrite( TraceOptions, sizeof( TraceOptions ), "norecord-cmd", 13 );
@ -533,12 +875,12 @@ bool SysTraceStart( int64_t& samplingPeriod )
TraceWrite( TraceOptions, sizeof( TraceOptions ), "noannotate", 11 );
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
if( !TraceWrite( TraceClock, sizeof( TraceClock ), "x86-tsc", 8 ) ) return false;
#elif __ARM_ARCH >= 6
#else
if( !TraceWrite( TraceClock, sizeof( TraceClock ), "mono_raw", 9 ) ) return false;
#endif
if( !TraceWrite( SchedSwitch, sizeof( SchedSwitch ), "1", 2 ) ) return false;
if( !TraceWrite( SchedWakeup, sizeof( SchedWakeup ), "1", 2 ) ) return false;
if( !TraceWrite( BufferSizeKb, sizeof( BufferSizeKb ), "512", 4 ) ) return false;
if( !TraceWrite( BufferSizeKb, sizeof( BufferSizeKb ), "4096", 5 ) ) return false;
#if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH )
SysTraceInjectPayload();
@ -547,6 +889,8 @@ bool SysTraceStart( int64_t& samplingPeriod )
if( !TraceWrite( TracingOn, sizeof( TracingOn ), "1", 2 ) ) return false;
traceActive.store( true, std::memory_order_relaxed );
SetupSampling( samplingPeriod );
return true;
}
@ -554,23 +898,27 @@ void SysTraceStop()
{
TraceWrite( TracingOn, sizeof( TracingOn ), "0", 2 );
traceActive.store( false, std::memory_order_relaxed );
if( s_threadSampling )
{
s_threadSampling->~Thread();
tracy_free( s_threadSampling );
}
}
static uint64_t ReadNumber( const char*& ptr )
static uint64_t ReadNumber( const char*& data )
{
uint64_t val = 0;
auto ptr = data;
assert( *ptr >= '0' && *ptr <= '9' );
uint64_t val = *ptr++ - '0';
for(;;)
{
if( *ptr >= '0' && *ptr <= '9' )
{
val = val * 10 + ( *ptr - '0' );
ptr++;
}
else
{
return val;
}
const uint8_t v = uint8_t( *ptr - '0' );
if( v > 9 ) break;
val = val * 10 + v;
ptr++;
}
data = ptr;
return val;
}
static uint8_t ReadState( char state )
@ -674,7 +1022,7 @@ static void HandleTraceLine( const char* line )
#if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 )
const auto time = ReadNumber( line );
#elif __ARM_ARCH >= 6
#else
const auto ts = ReadNumber( line );
line++; // '.'
const auto tus = ReadNumber( line );
@ -774,19 +1122,16 @@ static void ProcessTraceLines( int fd )
line = buf;
for(;;)
{
auto next = line;
while( next < end && *next != '\n' ) next++;
next++;
if( next >= end )
auto next = (char*)memchr( line, '\n', end - line );
if( !next )
{
const auto lsz = end - line;
memmove( buf, line, lsz );
line = buf + lsz;
break;
}
HandleTraceLine( line );
line = next;
line = ++next;
}
if( rd < 64*1024 )
{
@ -799,6 +1144,7 @@ static void ProcessTraceLines( int fd )
void SysTraceWorker( void* ptr )
{
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy SysTrace" );
int pipefd[2];
if( pipe( pipefd ) == 0 )
@ -812,6 +1158,8 @@ void SysTraceWorker( void* ptr )
if( dup2( pipefd[1], STDOUT_FILENO ) >= 0 )
{
close( pipefd[1] );
sched_param sp = { 4 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
#if defined __ANDROID__ && ( defined __aarch64__ || defined __ARM_ARCH )
execlp( "su", "su", "-c", "/data/tracy_systrace", (char*)nullptr );
#endif
@ -823,6 +1171,8 @@ void SysTraceWorker( void* ptr )
{
// parent
close( pipefd[1] );
sched_param sp = { 5 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
ProcessTraceLines( pipefd[0] );
close( pipefd[0] );
}
@ -856,14 +1206,10 @@ static void ProcessTraceLines( int fd )
const auto end = buf + rd;
for(;;)
{
auto next = line;
while( next < end && *next != '\n' ) next++;
if( next == end ) break;
assert( *next == '\n' );
next++;
auto next = (char*)memchr( line, '\n', end - line );
if( !next ) break;
HandleTraceLine( line );
line = next;
line = ++next;
}
}
@ -872,6 +1218,7 @@ static void ProcessTraceLines( int fd )
void SysTraceWorker( void* ptr )
{
ThreadExitHandler threadExitHandler;
SetThreadName( "Tracy SysTrace" );
char tmp[256];
memcpy( tmp, BasePath, sizeof( BasePath ) - 1 );
@ -879,6 +1226,8 @@ void SysTraceWorker( void* ptr )
int fd = open( tmp, O_RDONLY );
if( fd < 0 ) return;
sched_param sp = { 5 };
pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp );
ProcessTraceLines( fd );
close( fd );
}
@ -900,7 +1249,7 @@ void SysTraceSendExternalName( uint64_t thread )
}
else
{
GetProfiler().SendString( thread, "???", QueueType::ExternalThreadName );
GetProfiler().SendString( thread, "???", 3, QueueType::ExternalThreadName );
}
sprintf( fn, "/proc/%" PRIu64 "/status", thread );
@ -909,7 +1258,7 @@ void SysTraceSendExternalName( uint64_t thread )
{
int pid = -1;
size_t lsz = 1024;
auto line = (char*)malloc( lsz );
auto line = (char*)tracy_malloc( lsz );
for(;;)
{
auto rd = getline( &line, &lsz, f );
@ -920,7 +1269,7 @@ void SysTraceSendExternalName( uint64_t thread )
break;
}
}
free( line );
tracy_free( line );
fclose( f );
if( pid >= 0 )
{
@ -944,7 +1293,7 @@ void SysTraceSendExternalName( uint64_t thread )
}
}
}
GetProfiler().SendString( thread, "???", QueueType::ExternalName );
GetProfiler().SendString( thread, "???", 3, QueueType::ExternalName );
}
}

View File

@ -7,9 +7,24 @@
# include <pthread.h>
#endif
#ifdef TRACY_MANUAL_LIFETIME
# include "tracy_rpmalloc.hpp"
#endif
namespace tracy
{
class ThreadExitHandler
{
public:
~ThreadExitHandler()
{
#ifdef TRACY_MANUAL_LIFETIME
rpmalloc_thread_finalize();
#endif
}
};
#if defined _WIN32 || defined __CYGWIN__
class Thread

View File

@ -62,24 +62,6 @@
namespace tracy
{
// Exceptions
#ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
#if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
#define MOODYCAMEL_EXCEPTIONS_ENABLED
#endif
#endif
#ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
#define MOODYCAMEL_TRY try
#define MOODYCAMEL_CATCH(...) catch(__VA_ARGS__)
#define MOODYCAMEL_RETHROW throw
#define MOODYCAMEL_THROW(expr) throw (expr)
#else
#define MOODYCAMEL_TRY if (true)
#define MOODYCAMEL_CATCH(...) else if (false)
#define MOODYCAMEL_RETHROW
#define MOODYCAMEL_THROW(expr)
#endif
#ifndef MOODYCAMEL_NOEXCEPT
#if !defined(MOODYCAMEL_EXCEPTIONS_ENABLED)
#define MOODYCAMEL_NOEXCEPT

View File

@ -1372,7 +1372,7 @@ _memory_allocate_heap(void) {
heap = (heap_t*)_memory_map((1 + (sizeof(heap_t) >> _memory_page_size_shift)) * _memory_page_size, &align_offset);
if (!heap)
return heap;
memset(heap, 0, sizeof(heap_t));
memset((char*)heap, 0, sizeof(heap_t));
heap->align_offset = align_offset;
//Get a new heap ID

View File

@ -9,8 +9,8 @@ namespace tracy
constexpr unsigned Lz4CompressBound( unsigned isize ) { return isize + ( isize / 255 ) + 16; }
enum : uint32_t { ProtocolVersion = 35 };
enum : uint32_t { BroadcastVersion = 1 };
enum : uint32_t { ProtocolVersion = 42 };
enum : uint16_t { BroadcastVersion = 2 };
using lz4sz_t = uint32_t;
@ -87,6 +87,7 @@ struct WelcomeMessage
uint8_t onDemand;
uint8_t isApple;
uint8_t cpuArch;
uint8_t codeTransfer;
char cpuManufacturer[12];
uint32_t cpuId;
char programName[WelcomeMessageProgramNameSize];
@ -107,10 +108,10 @@ enum { OnDemandPayloadMessageSize = sizeof( OnDemandPayloadMessage ) };
struct BroadcastMessage
{
uint32_t broadcastVersion;
uint16_t broadcastVersion;
uint16_t listenPort;
uint32_t protocolVersion;
uint32_t listenPort;
uint32_t activeTime; // in seconds
int32_t activeTime; // in seconds
char programName[WelcomeMessageProgramNameSize];
};

View File

@ -16,19 +16,12 @@ enum class QueueType : uint8_t
MessageColorCallstack,
MessageAppInfo,
ZoneBeginAllocSrcLoc,
ZoneBeginAllocSrcLocLean,
ZoneBeginAllocSrcLocCallstack,
ZoneBeginAllocSrcLocCallstackLean,
CallstackMemory,
CallstackMemoryLean,
Callstack,
CallstackLean,
CallstackAlloc,
CallstackAllocLean,
CallstackSample,
CallstackSampleLean,
FrameImage,
FrameImageLean,
ZoneBegin,
ZoneBeginCallstack,
ZoneEnd,
@ -40,9 +33,13 @@ enum class QueueType : uint8_t
LockSharedRelease,
LockName,
MemAlloc,
MemAllocNamed,
MemFree,
MemFreeNamed,
MemAllocCallstack,
MemAllocCallstackNamed,
MemFreeCallstack,
MemFreeCallstackNamed,
GpuZoneBegin,
GpuZoneBeginCallstack,
GpuZoneEnd,
@ -56,6 +53,7 @@ enum class QueueType : uint8_t
Terminate,
KeepAlive,
ThreadContext,
GpuCalibration,
Crash,
CrashReport,
ZoneValidation,
@ -82,9 +80,11 @@ enum class QueueType : uint8_t
ParamSetup,
ParamPingback,
CpuTopology,
SingleStringData,
SecondStringData,
MemNamePayload,
StringData,
ThreadName,
CustomStringData,
PlotName,
SourceLocationPayload,
CallstackPayload,
@ -140,15 +140,15 @@ struct QueueFrameMark
uint64_t name; // ptr
};
struct QueueFrameImageLean
struct QueueFrameImage
{
uint64_t frame;
uint32_t frame;
uint16_t w;
uint16_t h;
uint8_t flip;
};
struct QueueFrameImage : public QueueFrameImageLean
struct QueueFrameImageFat : public QueueFrameImage
{
uint64_t image; // ptr
};
@ -164,9 +164,10 @@ struct QueueSourceLocation
uint8_t b;
};
struct QueueZoneText
struct QueueZoneTextFat
{
uint64_t text; // ptr
uint16_t size;
};
enum class LockType : uint8_t
@ -187,7 +188,6 @@ struct QueueLockTerminate
{
uint32_t id;
int64_t time;
LockType type;
};
struct QueueLockWait
@ -195,7 +195,6 @@ struct QueueLockWait
uint64_t thread;
uint32_t id;
int64_t time;
LockType type;
};
struct QueueLockObtain
@ -222,7 +221,12 @@ struct QueueLockMark
struct QueueLockName
{
uint32_t id;
};
struct QueueLockNameFat : public QueueLockName
{
uint64_t name; // ptr
uint16_t size;
};
enum class PlotDataType : uint8_t
@ -248,7 +252,6 @@ struct QueuePlotData
struct QueueMessage
{
int64_t time;
uint64_t text; // ptr
};
struct QueueMessageColor : public QueueMessage
@ -258,6 +261,28 @@ struct QueueMessageColor : public QueueMessage
uint8_t b;
};
struct QueueMessageLiteral : public QueueMessage
{
uint64_t text; // ptr
};
struct QueueMessageColorLiteral : public QueueMessageColor
{
uint64_t text; // ptr
};
struct QueueMessageFat : public QueueMessage
{
uint64_t text; // ptr
uint16_t size;
};
struct QueueMessageColorFat : public QueueMessageColor
{
uint64_t text; // ptr
uint16_t size;
};
// Don't change order, only add new entries at the end, this is also used on trace dumps!
enum class GpuContextType : uint8_t
{
@ -268,6 +293,11 @@ enum class GpuContextType : uint8_t
Direct3D12
};
enum GpuContextFlags : uint8_t
{
GpuContextCalibration = 1 << 0
};
struct QueueGpuNewContext
{
int64_t cpuTime;
@ -275,7 +305,7 @@ struct QueueGpuNewContext
uint64_t thread;
float period;
uint8_t context;
uint8_t accuracyBits;
GpuContextFlags flags;
GpuContextType type;
};
@ -303,6 +333,19 @@ struct QueueGpuTime
uint8_t context;
};
struct QueueGpuCalibration
{
int64_t gpuTime;
int64_t cpuTime;
int64_t cpuDelta;
uint8_t context;
};
struct QueueMemNamePayload
{
uint64_t name;
};
struct QueueMemAlloc
{
int64_t time;
@ -318,29 +361,24 @@ struct QueueMemFree
uint64_t ptr;
};
struct QueueCallstackMemory
struct QueueCallstackFat
{
uint64_t ptr;
};
struct QueueCallstack
{
uint64_t ptr;
};
struct QueueCallstackAlloc
struct QueueCallstackAllocFat
{
uint64_t ptr;
uint64_t nativePtr;
};
struct QueueCallstackSampleLean
struct QueueCallstackSample
{
int64_t time;
uint64_t thread;
};
struct QueueCallstackSample : public QueueCallstackSampleLean
struct QueueCallstackSampleFat : public QueueCallstackSample
{
uint64_t ptr;
};
@ -349,21 +387,17 @@ struct QueueCallstackFrameSize
{
uint64_t ptr;
uint8_t size;
uint64_t imageName;
};
struct QueueCallstackFrame
{
uint64_t name;
uint64_t file;
uint32_t line;
uint64_t symAddr;
char symLen[3];
uint32_t symLen;
};
struct QueueSymbolInformation
{
uint64_t file;
uint32_t line;
uint64_t symAddr;
};
@ -371,7 +405,6 @@ struct QueueSymbolInformation
struct QueueCodeInformation
{
uint64_t ptr;
uint64_t file;
uint32_t line;
};
@ -460,9 +493,9 @@ struct QueueItem
QueueStringTransfer stringTransfer;
QueueFrameMark frameMark;
QueueFrameImage frameImage;
QueueFrameImage frameImageLean;
QueueFrameImageFat frameImageFat;
QueueSourceLocation srcloc;
QueueZoneText zoneText;
QueueZoneTextFat zoneTextFat;
QueueLockAnnounce lockAnnounce;
QueueLockTerminate lockTerminate;
QueueLockWait lockWait;
@ -470,20 +503,26 @@ struct QueueItem
QueueLockRelease lockRelease;
QueueLockMark lockMark;
QueueLockName lockName;
QueueLockNameFat lockNameFat;
QueuePlotData plotData;
QueueMessage message;
QueueMessageColor messageColor;
QueueMessageLiteral messageLiteral;
QueueMessageColorLiteral messageColorLiteral;
QueueMessageFat messageFat;
QueueMessageColorFat messageColorFat;
QueueGpuNewContext gpuNewContext;
QueueGpuZoneBegin gpuZoneBegin;
QueueGpuZoneEnd gpuZoneEnd;
QueueGpuTime gpuTime;
QueueGpuCalibration gpuCalibration;
QueueMemAlloc memAlloc;
QueueMemFree memFree;
QueueCallstackMemory callstackMemory;
QueueCallstack callstack;
QueueCallstackAlloc callstackAlloc;
QueueMemNamePayload memName;
QueueCallstackFat callstackFat;
QueueCallstackAllocFat callstackAllocFat;
QueueCallstackSample callstackSample;
QueueCallstackSampleLean callstackSampleLean;
QueueCallstackSampleFat callstackSampleFat;
QueueCallstackFrameSize callstackFrameSize;
QueueCallstackFrame callstackFrame;
QueueSymbolInformation symbolInformation;
@ -504,27 +543,20 @@ struct QueueItem
enum { QueueItemSize = sizeof( QueueItem ) };
static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueZoneText ),
sizeof( QueueHeader ) + sizeof( QueueZoneText ), // zone name
sizeof( QueueHeader ), // zone text
sizeof( QueueHeader ), // zone name
sizeof( QueueHeader ) + sizeof( QueueMessage ),
sizeof( QueueHeader ) + sizeof( QueueMessageColor ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessage ), // app info
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, not for network transfer
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // lean allocated source location
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // allocated source location, callstack, not for network transfer
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // lean allocated source location, callstack
sizeof( QueueHeader ) + sizeof( QueueCallstackMemory ), // not for network transfer
sizeof( QueueHeader ), // lean callstack memory
sizeof( QueueHeader ) + sizeof( QueueCallstack ), // not for network transfer
sizeof( QueueHeader ), // lean callstack
sizeof( QueueHeader ) + sizeof( QueueCallstackAlloc ), // not for network transfer
sizeof( QueueHeader ), // lean callstack alloc
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ), // not for network transfer
sizeof( QueueHeader ) + sizeof( QueueCallstackSampleLean ),
sizeof( QueueHeader ) + sizeof( QueueFrameImage ), // not for network transfer
sizeof( QueueHeader ) + sizeof( QueueFrameImageLean ),
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location
sizeof( QueueHeader ) + sizeof( QueueZoneBeginLean ), // allocated source location, callstack
sizeof( QueueHeader ), // callstack memory
sizeof( QueueHeader ), // callstack
sizeof( QueueHeader ), // callstack alloc
sizeof( QueueHeader ) + sizeof( QueueCallstackSample ),
sizeof( QueueHeader ) + sizeof( QueueFrameImage ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueZoneEnd ),
@ -536,9 +568,13 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueLockRelease ), // shared
sizeof( QueueHeader ) + sizeof( QueueLockName ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ),
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // named
sizeof( QueueHeader ) + sizeof( QueueMemFree ),
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // named
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemAlloc ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMemFree ), // callstack, named
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ),
sizeof( QueueHeader ) + sizeof( QueueGpuZoneBegin ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuZoneEnd ),
@ -553,6 +589,7 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ), // terminate
sizeof( QueueHeader ), // keep alive
sizeof( QueueHeader ) + sizeof( QueueThreadContext ),
sizeof( QueueHeader ) + sizeof( QueueGpuCalibration ),
sizeof( QueueHeader ), // crash
sizeof( QueueHeader ) + sizeof( QueueCrashReport ),
sizeof( QueueHeader ) + sizeof( QueueZoneValidation ),
@ -564,10 +601,10 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueLockAnnounce ),
sizeof( QueueHeader ) + sizeof( QueueLockTerminate ),
sizeof( QueueHeader ) + sizeof( QueueLockMark ),
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal
sizeof( QueueHeader ) + sizeof( QueueMessage ), // literal, callstack
sizeof( QueueHeader ) + sizeof( QueueMessageColor ), // literal, callstack
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ),
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ),
sizeof( QueueHeader ) + sizeof( QueueMessageLiteral ), // callstack
sizeof( QueueHeader ) + sizeof( QueueMessageColorLiteral ), // callstack
sizeof( QueueHeader ) + sizeof( QueueGpuNewContext ),
sizeof( QueueHeader ) + sizeof( QueueCallstackFrameSize ),
sizeof( QueueHeader ) + sizeof( QueueCallstackFrame ),
@ -579,10 +616,12 @@ static constexpr size_t QueueDataSize[] = {
sizeof( QueueHeader ) + sizeof( QueueParamSetup ),
sizeof( QueueHeader ), // param pingback
sizeof( QueueHeader ) + sizeof( QueueCpuTopology ),
sizeof( QueueHeader ), // single string data
sizeof( QueueHeader ), // second string data
sizeof( QueueHeader ) + sizeof( QueueMemNamePayload ),
// keep all QueueStringTransfer below
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // thread name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // custom string data
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // plot name
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // allocated source location payload
sizeof( QueueHeader ) + sizeof( QueueStringTransfer ), // callstack payload

View File

@ -1,4 +1,5 @@
#include <assert.h>
#include <inttypes.h>
#include <new>
#include <stdio.h>
#include <stdlib.h>
@ -106,36 +107,39 @@ Socket::~Socket()
}
}
bool Socket::Connect( const char* addr, int port )
bool Socket::Connect( const char* addr, uint16_t port )
{
assert( !IsValid() );
if( m_ptr )
{
const auto c = connect( m_connSock, m_ptr->ai_addr, m_ptr->ai_addrlen );
assert( c == -1 );
#if defined _WIN32 || defined __CYGWIN__
const auto err = WSAGetLastError();
if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
if( err != WSAEISCONN )
if( c == -1 )
{
freeaddrinfo( m_res );
closesocket( m_connSock );
m_ptr = nullptr;
return false;
}
#if defined _WIN32
const auto err = WSAGetLastError();
if( err == WSAEALREADY || err == WSAEINPROGRESS ) return false;
if( err != WSAEISCONN )
{
freeaddrinfo( m_res );
closesocket( m_connSock );
m_ptr = nullptr;
return false;
}
#else
if( errno == EALREADY || errno == EINPROGRESS ) return false;
if( errno != EISCONN )
{
freeaddrinfo( m_res );
close( m_connSock );
m_ptr = nullptr;
return false;
}
const auto err = errno;
if( err == EALREADY || err == EINPROGRESS ) return false;
if( err != EISCONN )
{
freeaddrinfo( m_res );
close( m_connSock );
m_ptr = nullptr;
return false;
}
#endif
}
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
u_long nonblocking = 0;
ioctlsocket( m_connSock, FIONBIO, &nonblocking );
#else
@ -156,7 +160,7 @@ bool Socket::Connect( const char* addr, int port )
hints.ai_socktype = SOCK_STREAM;
char portbuf[32];
sprintf( portbuf, "%i", port );
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
int sock = 0;
@ -167,7 +171,7 @@ bool Socket::Connect( const char* addr, int port )
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
u_long nonblocking = 1;
ioctlsocket( sock, FIONBIO, &nonblocking );
#else
@ -180,7 +184,7 @@ bool Socket::Connect( const char* addr, int port )
}
else
{
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
const auto err = WSAGetLastError();
if( err != WSAEWOULDBLOCK )
{
@ -203,7 +207,7 @@ bool Socket::Connect( const char* addr, int port )
freeaddrinfo( res );
if( !ptr ) return false;
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
u_long nonblocking = 0;
ioctlsocket( sock, FIONBIO, &nonblocking );
#else
@ -215,6 +219,48 @@ bool Socket::Connect( const char* addr, int port )
return true;
}
bool Socket::ConnectBlocking( const char* addr, uint16_t port )
{
assert( !IsValid() );
assert( !m_ptr );
struct addrinfo hints;
struct addrinfo *res, *ptr;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
int sock = 0;
for( ptr = res; ptr; ptr = ptr->ai_next )
{
if( ( sock = socket( ptr->ai_family, ptr->ai_socktype, ptr->ai_protocol ) ) == -1 ) continue;
#if defined __APPLE__
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
if( connect( sock, ptr->ai_addr, ptr->ai_addrlen ) == -1 )
{
#ifdef _WIN32
closesocket( sock );
#else
close( sock );
#endif
continue;
}
break;
}
freeaddrinfo( res );
if( !ptr ) return false;
m_sock.store( sock, std::memory_order_relaxed );
return true;
}
void Socket::Close()
{
const auto sock = m_sock.load( std::memory_order_relaxed );
@ -247,7 +293,7 @@ int Socket::GetSendBufSize()
{
const auto sock = m_sock.load( std::memory_order_relaxed );
int bufSize;
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
int sz = sizeof( bufSize );
getsockopt( sock, SOL_SOCKET, SO_SNDBUF, (char*)&bufSize, &sz );
#else
@ -306,6 +352,24 @@ int Socket::Recv( void* _buf, int len, int timeout )
}
}
int Socket::ReadUpTo( void* _buf, int len, int timeout )
{
const auto sock = m_sock.load( std::memory_order_relaxed );
auto buf = (char*)_buf;
int rd = 0;
while( len > 0 )
{
const auto res = recv( sock, buf, len, 0 );
if( res == 0 ) break;
if( res == -1 ) return -1;
len -= res;
rd += res;
buf += res;
}
return rd;
}
bool Socket::Read( void* buf, int len, int timeout )
{
auto cbuf = (char*)buf;
@ -383,33 +447,45 @@ ListenSocket::~ListenSocket()
if( m_sock != -1 ) Close();
}
bool ListenSocket::Listen( int port, int backlog )
static int addrinfo_and_socket_for_family( uint16_t port, int ai_family, struct addrinfo** res )
{
struct addrinfo hints;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = ai_family;
hints.ai_socktype = SOCK_STREAM;
#ifndef TRACY_ONLY_LOCALHOST
const char* onlyLocalhost = getenv( "TRACY_ONLY_LOCALHOST" );
if( !onlyLocalhost || onlyLocalhost[0] != '1' )
{
hints.ai_flags = AI_PASSIVE;
}
#endif
char portbuf[32];
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( nullptr, portbuf, &hints, res ) != 0 ) return -1;
int sock = socket( (*res)->ai_family, (*res)->ai_socktype, (*res)->ai_protocol );
if (sock == -1) freeaddrinfo( *res );
return sock;
}
bool ListenSocket::Listen( uint16_t port, int backlog )
{
assert( m_sock == -1 );
struct addrinfo* res;
struct addrinfo hints;
struct addrinfo* res = nullptr;
memset( &hints, 0, sizeof( hints ) );
hints.ai_family = AF_INET6;
hints.ai_socktype = SOCK_STREAM;
#ifndef TRACY_ONLY_LOCALHOST
hints.ai_flags = AI_PASSIVE;
#ifndef TRACY_ONLY_IPV4
const char* onlyIPv4 = getenv( "TRACY_ONLY_IPV4" );
if( !onlyIPv4 || onlyIPv4[0] != '1' )
{
m_sock = addrinfo_and_socket_for_family( port, AF_INET6, &res );
}
#endif
char portbuf[32];
sprintf( portbuf, "%i", port );
if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false;
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
if (m_sock == -1)
{
// IPV6 protocol may not be available/is disabled. Try to create a socket
// with the IPV4 protocol
hints.ai_family = AF_INET;
if( getaddrinfo( nullptr, portbuf, &hints, &res ) != 0 ) return false;
m_sock = socket( res->ai_family, res->ai_socktype, res->ai_protocol );
m_sock = addrinfo_and_socket_for_family( port, AF_INET, &res );
if( m_sock == -1 ) return false;
}
#if defined _WIN32 || defined __CYGWIN__
@ -483,7 +559,7 @@ UdpBroadcast::~UdpBroadcast()
if( m_sock != -1 ) Close();
}
bool UdpBroadcast::Open( const char* addr, int port )
bool UdpBroadcast::Open( const char* addr, uint16_t port )
{
assert( m_sock == -1 );
@ -495,7 +571,7 @@ bool UdpBroadcast::Open( const char* addr, int port )
hints.ai_socktype = SOCK_DGRAM;
char portbuf[32];
sprintf( portbuf, "%i", port );
sprintf( portbuf, "%" PRIu16, port );
if( getaddrinfo( addr, portbuf, &hints, &res ) != 0 ) return false;
int sock = 0;
@ -506,7 +582,7 @@ bool UdpBroadcast::Open( const char* addr, int port )
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
unsigned long broadcast = 1;
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
#else
@ -541,7 +617,7 @@ void UdpBroadcast::Close()
m_sock = -1;
}
int UdpBroadcast::Send( int port, const void* data, int len )
int UdpBroadcast::Send( uint16_t port, const void* data, int len )
{
assert( m_sock != -1 );
struct sockaddr_in addr;
@ -563,8 +639,10 @@ IpAddress::~IpAddress()
void IpAddress::Set( const struct sockaddr& addr )
{
#if __MINGW32__
auto ai = (struct sockaddr_in*)&addr;
#if defined _WIN32 && ( !defined NTDDI_WIN10 || NTDDI_VERSION < NTDDI_WIN10 )
struct sockaddr_in tmp;
memcpy( &tmp, &addr, sizeof( tmp ) );
auto ai = &tmp;
#else
auto ai = (const struct sockaddr_in*)&addr;
#endif
@ -585,7 +663,7 @@ UdpListen::~UdpListen()
if( m_sock != -1 ) Close();
}
bool UdpListen::Listen( int port )
bool UdpListen::Listen( uint16_t port )
{
assert( m_sock == -1 );
@ -596,14 +674,14 @@ bool UdpListen::Listen( int port )
int val = 1;
setsockopt( sock, SOL_SOCKET, SO_NOSIGPIPE, &val, sizeof( val ) );
#endif
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
unsigned long reuse = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, (const char*)&reuse, sizeof( reuse ) );
#else
int reuse = 1;
setsockopt( m_sock, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof( reuse ) );
#endif
#if defined _WIN32 || defined __CYGWIN__
#if defined _WIN32
unsigned long broadcast = 1;
if( setsockopt( sock, SOL_SOCKET, SO_BROADCAST, (const char*)&broadcast, sizeof( broadcast ) ) == -1 )
#else
@ -649,14 +727,14 @@ void UdpListen::Close()
m_sock = -1;
}
const char* UdpListen::Read( size_t& len, IpAddress& addr )
const char* UdpListen::Read( size_t& len, IpAddress& addr, int timeout )
{
static char buf[2048];
struct pollfd fd;
fd.fd = (socket_t)m_sock;
fd.events = POLLIN;
if( poll( &fd, 1, 10 ) <= 0 ) return nullptr;
if( poll( &fd, 1, timeout ) <= 0 ) return nullptr;
sockaddr sa;
socklen_t salen = sizeof( struct sockaddr );

View File

@ -23,12 +23,14 @@ public:
Socket( int sock );
~Socket();
bool Connect( const char* addr, int port );
bool Connect( const char* addr, uint16_t port );
bool ConnectBlocking( const char* addr, uint16_t port );
void Close();
int Send( const void* buf, int len );
int GetSendBufSize();
int ReadUpTo( void* buf, int len, int timeout );
bool Read( void* buf, int len, int timeout );
template<typename ShouldExit>
@ -74,7 +76,7 @@ public:
ListenSocket();
~ListenSocket();
bool Listen( int port, int backlog );
bool Listen( uint16_t port, int backlog );
Socket* Accept();
void Close();
@ -93,10 +95,10 @@ public:
UdpBroadcast();
~UdpBroadcast();
bool Open( const char* addr, int port );
bool Open( const char* addr, uint16_t port );
void Close();
int Send( int port, const void* data, int len );
int Send( uint16_t port, const void* data, int len );
UdpBroadcast( const UdpBroadcast& ) = delete;
UdpBroadcast( UdpBroadcast&& ) = delete;
@ -134,10 +136,10 @@ public:
UdpListen();
~UdpListen();
bool Listen( int port );
bool Listen( uint16_t port );
void Close();
const char* Read( size_t& len, IpAddress& addr );
const char* Read( size_t& len, IpAddress& addr, int timeout );
UdpListen( const UdpListen& ) = delete;
UdpListen( UdpListen&& ) = delete;

View File

@ -237,3 +237,13 @@ TRACY_API const char* GetThreadName( uint64_t id )
}
}
#ifdef __cplusplus
extern "C" {
#endif
TRACY_API void ___tracy_set_thread_name( const char* name ) { tracy::SetThreadName( name ); }
#ifdef __cplusplus
}
#endif

View File

@ -288,11 +288,10 @@ namespace contouring {
for (size_t i = 0; i < surrounding.size(); i++) {
auto &edits = surrounding[i]->getEdits();
auto offset = glm::ivec3(surrounding::g_corner_offsets[i]) * CHUNK_LENGTH;
for (auto it = edits.end(); it != edits.begin();) {
it--;
auto p = offset + glm::ivec3(glm::fromIdx(it->idx));
for (auto it = edits.begin(); it != edits.end(); ++it) {
auto p = offset + glm::ivec3(glm::fromIdx(it->first));
if(p.x < SIZE && p.y < SIZE && p.z < SIZE) {
setCell(p.x, p.y, p.z, it->value);
setCell(p.x, p.y, p.z, it->second.value);
}
}
}

View File

@ -11,7 +11,7 @@
namespace tracy {
class VkCtx;
}
typedef tracy::VkCtx* TracyVkCtx;
typedef tracy::VkCtx* TracyVkCtxPtr;
namespace render::vk {
class SwapChain;
@ -54,7 +54,7 @@ private:
VkQueue graphicsQueue;
VkCommandPool graphicsPool;
std::vector<VkCommandBuffer> graphicsBuffers;
TracyVkCtx tracyCtx;
TracyVkCtxPtr tracyCtx;
BufferGroup uniformBuffers;

View File

@ -16,7 +16,7 @@ public:
it->second -= deltaTime;
if (it->second <= 0 && animate) {
invalidate(it->first.idx);
edits.emplace_back(it->first);
edits.emplace(it->first.idx, it->first);
it = futureEdits.erase(it);
} else {
it++;

View File

@ -9,6 +9,7 @@
#include "../../core/net/io.hpp"
#include "../../core/utils/logger.hpp"
#include "Chunk.hpp"
#include <random>
using namespace world::client;
@ -29,6 +30,8 @@ void DistantUniverse::update(voxel_pos pos, float deltaTime) {
{ // Update alive areas
ZoneScopedN("World");
auto rng = std::mt19937(std::rand());
const auto contouringThreshold = rng.max() / (1 + contouring->getQueueSize());
for (auto& area: areas) {
ZoneScopedN("Area");
const bool chunkChangeArea = (false && area.second->move(glm::vec3(deltaTime))) || chunkChange; // TODO: area.velocity
@ -41,7 +44,7 @@ void DistantUniverse::update(voxel_pos pos, float deltaTime) {
if (glm::length2(diff - it_c->first) > glm::pow2(options.keepDistance)) {
it_c = chunks.erase(it_c);
} else {
if(const auto neighbors = std::dynamic_pointer_cast<Chunk>(it_c->second)->update(deltaTime, true /*MAYBE: random update*/)) {
if(const auto neighbors = std::dynamic_pointer_cast<Chunk>(it_c->second)->update(deltaTime, rng() < contouringThreshold)) {
contouring->onUpdate(std::make_pair(area.first, it_c->first), diff, chunks, neighbors.value());
}
++it_c;
@ -290,24 +293,9 @@ bool DistantUniverse::onPacket(const data::out_view& buf, net::PacketFlags) {
if (!fill)
break;
if(const auto it = areas.find(fill->pos.first); it != areas.end()) {
auto &chunks = it->second->setChunks();
auto iterator = world::iterator::Get(fill->shape, fill->radius);
world::iterator::pair point;
while (iterator->next(point)) {
const voxel_pos offset = point.first;
const auto split = glm::splitIdx(fill->pos.second + offset);
if(chunks.inRange(split.first)) {
if(const auto chunk = it->second->setChunks().findInRange(split.first)) {
auto ck = std::dynamic_pointer_cast<Chunk>(chunk.value());
auto prev = ck->get(split.second);
const auto next = prev.filled(fill->val, point.second);
const auto delay = glm::length2(offset) / fill->radius * .05f;
ck->apply(Chunk::Edit{split.second, next, delay});
}
}
}
}
world::iterator::Apply<Chunk>(areas, *fill, [](std::shared_ptr<Chunk> &ck, chunk_pos, chunk_voxel_idx idx, Voxel, Voxel next, float delay) {
ck->apply(Chunk::Edit{next, delay, idx});
});
break;
}
@ -395,27 +383,10 @@ void DistantUniverse::emit(const action::packet &action) {
peer.send(net::PacketWriter::Of(net::client_packet_type::FILL_SHAPE, *fill));
if (options.editPrediction) {
ZoneScopedN("Fill");
const auto keepDelay = 10 + (peer.getRTT() / 20000.f); // 10s + 50RTT
if(const auto it = areas.find(fill->pos.first); it != areas.end()) {
auto &chunks = it->second->setChunks();
auto iterator = world::iterator::Get(fill->shape, fill->radius);
world::iterator::pair point;
while (iterator->next(point)) {
const voxel_pos offset = point.first;
const auto split = glm::splitIdx(fill->pos.second + offset);
if(chunks.inRange(split.first)) {
if(const auto chunk = it->second->setChunks().findInRange(split.first)) {
auto ck = std::dynamic_pointer_cast<Chunk>(chunk.value());
auto prev = ck->get(split.second);
const auto next = prev.filled(fill->val, point.second);
if(prev.value != next.value) {
const auto delay = glm::length2(offset) / fill->radius * .05f;
ck->addFutureEdit(Chunk::Edit{split.second, next, keepDelay - delay * 2}, delay);
}
}
}
}
}
const auto keepDelay = 5 + (peer.getRTT() / 20000.f); // 5s + 50RTT
world::iterator::Apply<Chunk>(areas, *fill, [&](std::shared_ptr<Chunk> &ck, chunk_pos, chunk_voxel_idx idx, Voxel, Voxel next, float delay) {
ck->addFutureEdit(Chunk::Edit{next, keepDelay - delay * 2, idx}, delay);
});
}
} else {
LOG_W("Bad action " << action.index());

View File

@ -43,7 +43,9 @@ public:
void* data() { return buffer.writeTo(0); }
void reserve(size_t target) {
if (target >= buffer.siz - buffer.cur) {
buffer.ptr = (uint8_t*)realloc(buffer.ptr, target + buffer.cur);
const auto size = target + buffer.cur;
buffer.ptr = (uint8_t *)realloc(buffer.ptr, size);
buffer.siz = size;
}
}
void resize(size_t target) {

View File

@ -13,11 +13,13 @@ namespace world {
Chunk(std::istream& str, bool rle = RLE);
virtual ~Chunk();
struct Edit {
chunk_voxel_idx idx;
struct EditBody {
Voxel value;
float delay;
};
struct Edit: EditBody {
chunk_voxel_idx idx;
};
/// Get voxel from index
inline const Voxel& get(chunk_voxel_idx idx) const {

View File

@ -12,9 +12,9 @@ EdittableChunk::~EdittableChunk() { }
std::optional<Faces> EdittableChunk::update(float deltaTime, bool animate) {
ZoneScopedN("Chunk");
for(auto it = edits.begin(); it != edits.end();) {
it->delay -= deltaTime;
if(it->delay <= 0 && animate) {
invalidate(it->idx);
it->second.delay -= deltaTime;
if(it->second.delay <= 0 && animate) {
invalidate(it->first);
it = edits.erase(it);
} else {
it++;
@ -42,8 +42,9 @@ void EdittableChunk::apply(const Edit& edit) {
const auto prev = voxels[edit.idx];
if(prev.value != edit.value.value) {
voxels[edit.idx] = edit.value;
edits.erase(edit.idx);
if(edit.delay > 0) {
edits.emplace_back<Edit>({edit.idx, prev, edit.delay});
edits.emplace(edit.idx, EditBody{prev, edit.delay});
} else {
invalidate(edit.idx);
}

View File

@ -22,8 +22,9 @@ namespace world::client {
void apply(const Chunk::Edit &edit);
using edits_t = robin_hood::unordered_map<chunk_voxel_idx, EditBody>;
/// Get pending changes
const std::vector<Chunk::Edit> &getEdits() const { return edits; }
const edits_t &getEdits() const { return edits; }
static std::optional<chunk_voxel_idx> getNeighborIdx(chunk_voxel_idx idx, Face dir);
@ -31,8 +32,7 @@ namespace world::client {
EdittableChunk();
/// Animated changes
/// MAYBE: sort by delay
std::vector<Chunk::Edit> edits;
edits_t edits;
/// Require update
bool upToDate = true;
/// Neighbors to update

View File

@ -18,6 +18,34 @@ protected:
/// From -radius to radius
std::unique_ptr<Abstract> Get(action::Shape, uint16_t radius);
template<typename Chunk, typename area_map, typename CB>
void Apply(area_map &areas, action::FillShape fill, const CB& callback) {
if(const auto it = areas.find(fill.pos.first); it != areas.end()) {
auto &chunks = it->second->setChunks();
auto iterator = Get(fill.shape, fill.radius);
pair point;
typename std::shared_ptr<Chunk> ck = nullptr;
chunk_pos ck_pos = chunk_pos(INT32_MAX);
while (iterator->next(point)) {
const voxel_pos offset = point.first;
const auto split = glm::splitIdx(fill.pos.second + offset);
if (split.first != ck_pos && chunks.inRange(split.first)) {
if(auto it = chunks.find(split.first); it != chunks.end()) {
ck = std::dynamic_pointer_cast<Chunk>(it->second);
ck_pos = split.first;
}
}
if (split.first == ck_pos) {
auto prev = ck->get(split.second);
const auto next = prev.filled(fill.val, point.second);
if (prev.value != next.value) {
callback(ck, ck_pos, split.second, prev, next, glm::length2(offset) / fill.radius * .05f);
}
}
}
}
}
class Cube final: public Abstract {
public:
bool next(pair&) override;

View File

@ -58,6 +58,14 @@ public:
call(&peer);
}
}
template<typename P>
bool anyPeer(P predicate) {
for(auto& peer: peers) {
if(predicate(&peer))
return true;
}
return false;
}
private:
std::forward_list<Peer> peers;

View File

@ -16,8 +16,9 @@ public:
std::optional<Item> replace(chunk_voxel_idx idx, const Voxel &val, float delay = 0) override {
const auto res = voxels[idx];
set(idx, val);
edits.erase(idx);
if(delay > 0) {
edits.emplace_back<Edit>({idx, res, delay});
edits.emplace(idx, EditBody{res, delay});
} else {
invalidate(idx);
}

View File

@ -231,7 +231,7 @@ void Universe::pull() {
if (data == nullptr)
return;
if (data->pendingEdits.empty() && peer->queueSize(net::server::queue::EDIT) == 0) {
if (!data->pendingEdits.empty() && peer->queueSize(net::server::queue::EDIT) == 0) {
peer->send(net::PacketWriter::Of(net::server_packet_type::EDITS, data->pendingEdits.front()));
data->pendingEdits.pop();
}
@ -620,8 +620,9 @@ bool Universe::onPacket(net::server::Peer *peer, const data::out_view &buf, net:
if (!packet.read(cpos))
break;
const auto dist = glm::length2(areaOffset - cpos);
if (dist <= glm::pow2(loadDistance) && chunks.inRange(cpos) && chunks.findInRange(cpos).has_value()) {
data->pushChunk(std::make_pair(id, cpos), dist);
if (dist <= glm::pow2(loadDistance) && chunks.inRange(cpos)) {
if (chunks.findInRange(cpos).has_value())
data->pushChunk(std::make_pair(id, cpos), dist);
} else {
LOG_T("Request out of range chunk");
}
@ -692,59 +693,41 @@ bool Universe::isAreaFree(const area_<voxel_pos> &pos, const geometry::Shape sha
world::ItemList Universe::set(const area_<voxel_pos>& pos, int radius, action::Shape shape, const Voxel& val) {
ZoneScopedN("Fill");
ItemList list;
if(const auto it = areas.find(pos.first); it != areas.end()) {
robin_hood::unordered_map<chunk_pos, std::vector<Chunk::Edit>> edits;
auto &chunks = it->second->setChunks();
auto iterator = world::iterator::Get(shape, radius);
world::iterator::pair point;
while (iterator->next(point)) {
const voxel_pos offset = point.first;
const auto split = glm::splitIdx(pos.second + offset);
if(chunks.inRange(split.first)) {
if(const auto chunk = it->second->setChunks().findInRange(split.first)) {
auto ck = std::dynamic_pointer_cast<Chunk>(chunk.value());
auto prev = ck->get(split.second);
const auto next = prev.filled(val, point.second);
if(prev.value != next.value) {
//TODO: apply break table
//TODO: inventory
const auto delay = glm::length2(offset) / radius * .05f;
edits[split.first].push_back(Chunk::Edit{split.second, next, delay});
ck->replace(split.second, next, delay);
}
}
}
const bool stupidClient = host.anyPeer([&](net::server::Peer *peer) {
auto data = peer->getCtx<net_client>();
return data && !data->handleEdits;
});
robin_hood::unordered_map<chunk_pos, std::vector<Chunk::Edit>> edits;
world::iterator::Apply<Chunk>(areas, world::action::FillShape(pos, val, shape, radius),
[&](std::shared_ptr<Chunk>& ck, chunk_pos ck_pos, chunk_voxel_idx idx, Voxel /*prev*/, Voxel next, float delay) {
if (stupidClient)
edits[ck_pos].push_back(Chunk::Edit{next, delay, idx});
//TODO: apply break table
//TODO: inventory
ck->replace(idx, next, delay);
});
if (stupidClient && !edits.empty()) {
ZoneScopedN("Packet");
size_t size = sizeof(area_id);
for(const auto& part: edits) {
size += sizeof(chunk_pos);
size += sizeof(chunk_voxel_idx);
size += sizeof(Chunk::Edit) * part.second.size();
}
bool stupidClient = false;
auto packet = net::PacketWriter(net::server_packet_type::EDITS, size);
packet.write(pos.first);
for(const auto& part: edits) {
packet.write(part.first);
packet.write<chunk_voxel_idx>(part.second.size());
packet.write(part.second.data(), part.second.size() * sizeof(Chunk::Edit));
}
auto buffer = packet.finish();
host.iterPeers([&](net::server::Peer *peer) {
//MAYBE: only in range
auto data = peer->getCtx<net_client>();
if (data && !data->handleEdits)
stupidClient = true;
peer->send(buffer, net::server::queue::CHUNK);
});
if (stupidClient) {
ZoneScopedN("Packet");
size_t size = sizeof(area_id);
for(const auto& part: edits) {
size += sizeof(chunk_pos);
size += sizeof(chunk_voxel_idx);
size += sizeof(Chunk::Edit) * part.second.size();
}
auto packet = net::PacketWriter(net::server_packet_type::EDITS, size);
packet.write(pos.first);
for(const auto& part: edits) {
packet.write(part.first);
packet.write<chunk_voxel_idx>(part.second.size());
packet.write(part.second.data(), part.second.size() * sizeof(Chunk::Edit));
}
auto buffer = packet.finish();
host.iterPeers([&](net::server::Peer *peer) {
//MAYBE: only in range
auto data = peer->getCtx<net_client>();
if (data && !data->handleEdits)
peer->send(buffer, net::server::queue::CHUNK);
});
}
}
return list;
}