#include "TracyDebug.hpp" #include "TracyStringHelpers.hpp" #include "TracySysTrace.hpp" #include "../common/TracySystem.hpp" #ifdef TRACY_HAS_SYSTEM_TRACING #ifndef TRACY_SAMPLING_HZ # if defined _WIN32 # define TRACY_SAMPLING_HZ 8000 # elif defined __linux__ # define TRACY_SAMPLING_HZ 10000 # endif #endif namespace tracy { static int GetSamplingFrequency() { int samplingHz = TRACY_SAMPLING_HZ; auto env = GetEnvVar( "TRACY_SAMPLING_HZ" ); if( env ) { int val = atoi( env ); if( val > 0 ) samplingHz = val; } #if defined _WIN32 return samplingHz > 8000 ? 8000 : ( samplingHz < 1 ? 1 : samplingHz ); #else return samplingHz > 1000000 ? 1000000 : ( samplingHz < 1 ? 1 : samplingHz ); #endif } static int GetSamplingPeriod() { return 1000000000 / GetSamplingFrequency(); } } # if defined _WIN32 # ifndef NOMINMAX # define NOMINMAX # endif # define INITGUID # include # include # include # include # include # include # include "../common/TracyAlloc.hpp" # include "../common/TracySystem.hpp" # include "TracyProfiler.hpp" # include "TracyThread.hpp" # include "windows/TracyETW.cpp" namespace tracy { static DWORD s_pid; extern "C" typedef NTSTATUS (WINAPI *t_NtQueryInformationThread)( HANDLE, THREADINFOCLASS, PVOID, ULONG, PULONG ); extern "C" typedef BOOL (WINAPI *t_EnumProcessModules)( HANDLE, HMODULE*, DWORD, LPDWORD ); extern "C" typedef BOOL (WINAPI *t_GetModuleInformation)( HANDLE, HMODULE, LPMODULEINFO, DWORD ); extern "C" typedef DWORD (WINAPI *t_GetModuleBaseNameA)( HANDLE, HMODULE, LPSTR, DWORD ); extern "C" typedef HRESULT (WINAPI *t_GetThreadDescription)( HANDLE, PWSTR* ); t_NtQueryInformationThread NtQueryInformationThread = (t_NtQueryInformationThread)GetProcAddress( GetModuleHandleA( "ntdll.dll" ), "NtQueryInformationThread" ); t_EnumProcessModules _EnumProcessModules = (t_EnumProcessModules)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32EnumProcessModules" ); t_GetModuleInformation _GetModuleInformation = (t_GetModuleInformation)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleInformation" ); t_GetModuleBaseNameA _GetModuleBaseNameA = (t_GetModuleBaseNameA)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "K32GetModuleBaseNameA" ); static t_GetThreadDescription _GetThreadDescription = 0; void WINAPI EventRecordCallback( PEVENT_RECORD record ) { #ifdef TRACY_ON_DEMAND if( !GetProfiler().IsConnected() ) return; #endif const auto& hdr = record->EventHeader; // WARN: doing a fast switch-match below with the top 32 bits of the GUID // (Data1 is the leading 32bit word of the 128bit GUID). // Ideally, we should be using 'IsEqualGUID()' inside each case match to be // inequivocally sure we are dealing the correct event provider. switch( hdr.ProviderId.Data1 ) { case etw::ThreadGuid.Data1: if( hdr.EventDescriptor.Opcode == etw::CSwitch::Opcode ) { const auto cswitch = (const etw::CSwitch*)record->UserData; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->contextSwitch.oldThread, cswitch->oldThreadId ); MemWrite( &item->contextSwitch.newThread, cswitch->newThreadId ); MemWrite( &item->contextSwitch.cpu, record->BufferContext.ProcessorNumber ); MemWrite( &item->contextSwitch.oldThreadWaitReason, cswitch->oldThreadWaitReason ); MemWrite( &item->contextSwitch.oldThreadState, cswitch->oldThreadState ); MemWrite( &item->contextSwitch.newThreadPriority, cswitch->newThreadPriority ); MemWrite( &item->contextSwitch.oldThreadPriority, cswitch->oldThreadPriority ); MemWrite( &item->contextSwitch.previousCState, cswitch->previousCState ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == etw::ReadyThread::Opcode ) { const auto rt = (const etw::ReadyThread*)record->UserData; TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->threadWakeup.cpu, record->BufferContext.ProcessorNumber ); MemWrite( &item->threadWakeup.thread, rt->threadId ); MemWrite( &item->threadWakeup.adjustReason, rt->adjustReason ); MemWrite( &item->threadWakeup.adjustIncrement, rt->adjustIncrement ); TracyLfqCommit; } else if( hdr.EventDescriptor.Opcode == etw::ThreadStart::Opcode || hdr.EventDescriptor.Opcode == etw::ThreadDCStart::Opcode ) { const auto ti = (const etw::ThreadInfo*)record->UserData; uint64_t tid = ti->threadId; if( tid == 0 ) return; uint64_t pid = ti->processId; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, tid ); MemWrite( &item->tidToPid.pid, pid ); TracyLfqCommit; } break; case etw::StackWalkGuid.Data1: if( hdr.EventDescriptor.Opcode == etw::StackWalkEvent::Opcode ) { const auto sw = (const etw::StackWalkEvent*)record->UserData; if( sw->stackProcess == s_pid ) { const uint64_t sz = ( record->UserDataLength - 16 ) / 8; if( sz > 0 ) { auto trace = (uint64_t*)tracy_malloc( ( 1 + sz ) * sizeof( uint64_t ) ); memcpy( trace, &sz, sizeof( uint64_t ) ); memcpy( trace+1, sw->stack, sizeof( uint64_t ) * sz ); TracyLfqPrepare( QueueType::CallstackSample ); MemWrite( &item->callstackSampleFat.time, sw->eventTimeStamp ); MemWrite( &item->callstackSampleFat.thread, sw->stackThread ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } } break; case etw::DxgKrnlGuid.Data1: assert( hdr.EventDescriptor.Id == etw::VSyncDPC::EventId ); { const auto vs = (const etw::VSyncDPC*)record->UserData; TracyLfqPrepare( QueueType::FrameVsync ); MemWrite( &item->frameVsync.time, hdr.TimeStamp.QuadPart ); MemWrite( &item->frameVsync.id, vs->vidPnTargetId ); TracyLfqCommit; } break; default: break; } } static int GetSamplingInterval() { return GetSamplingPeriod() / 100; } static etw::Session session_kernel = {}; static etw::Session session_vsync = {}; static PROCESSTRACE_HANDLE consumer_kernel = INVALID_PROCESSTRACE_HANDLE; static PROCESSTRACE_HANDLE consumer_vsync = INVALID_PROCESSTRACE_HANDLE; static Thread* s_threadVsync = nullptr; bool SysTraceStart( int64_t& samplingPeriod ) { if( !_GetThreadDescription ) _GetThreadDescription = (t_GetThreadDescription)GetProcAddress( GetModuleHandleA( "kernel32.dll" ), "GetThreadDescription" ); s_pid = GetCurrentProcessId(); if ( !etw::CheckAdminPrivilege() ) return false; ULONGLONG EnableFlags = 0; #ifndef TRACY_NO_CONTEXT_SWITCH EnableFlags |= EVENT_TRACE_FLAG_CSWITCH; EnableFlags |= EVENT_TRACE_FLAG_DISPATCHER; EnableFlags |= EVENT_TRACE_FLAG_THREAD; #endif #ifndef TRACY_NO_SAMPLING DWORD access = etw::ElevatePrivilege( SE_SYSTEM_PROFILE_NAME ); if( access != ERROR_SUCCESS ) return access; EnableFlags |= etw::IsOS64Bit() ? EVENT_TRACE_FLAG_PROFILE : 0; #endif session_kernel = etw::StartSingletonKernelLoggerSession( EnableFlags ); if ( session_kernel.handle == 0 ) return false; #ifndef TRACY_NO_CONTEXT_SWITCH if ( etw::EnableStackWalk( session_kernel, etw::ThreadGuid, etw::CSwitch::Opcode ) != ERROR_SUCCESS ) return etw::StopSession( session_kernel ), false; #endif #ifndef TRACY_NO_SAMPLING if ( etw::EnableStackWalk( session_kernel, etw::PerfInfoGuid, etw::SampledProfile::Opcode ) != ERROR_SUCCESS ) return etw::StopSession( session_kernel ), false; int microseconds = GetSamplingInterval() / 10; if ( etw::SetCPUProfilingInterval( microseconds ) != ERROR_SUCCESS ) return etw::StopSession( session_kernel ), false; samplingPeriod = GetSamplingPeriod(); #endif consumer_kernel = etw::SetupEventConsumer( session_kernel, EventRecordCallback ); if ( consumer_kernel == INVALID_PROCESSTRACE_HANDLE ) return etw::StopSession( session_kernel ), false; #ifndef TRACY_NO_VSYNC_CAPTURE session_vsync = etw::StartUserSession( "TracyVsync" ); if ( session_vsync.handle != 0 ) { if ( etw::EnableVSyncMonitoring( session_vsync ) != ERROR_SUCCESS ) etw::StopSession( session_vsync ); else { consumer_vsync = etw::SetupEventConsumer( session_vsync, EventRecordCallback ); if ( consumer_vsync != INVALID_PROCESSTRACE_HANDLE ) { s_threadVsync = (Thread*)tracy_malloc( sizeof( Thread ) ); new(s_threadVsync) Thread( [] (void*) { ThreadExitHandler threadExitHandler; SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); SetThreadName( "Tracy Vsync (ETW)" ); etw::EventConsumerLoop( consumer_vsync ); }, nullptr ); } } } #endif return true; } void SysTraceStop() { if( s_threadVsync ) { etw::StopEventConsumer( consumer_vsync ); etw::StopSession( session_vsync ); s_threadVsync->~Thread(); tracy_free( s_threadVsync ); } etw::StopEventConsumer( consumer_kernel ); etw::StopSession( session_kernel ); } void SysTraceWorker( void* ptr ) { ThreadExitHandler threadExitHandler; SetThreadPriority( GetCurrentThread(), THREAD_PRIORITY_TIME_CRITICAL ); SetThreadName( "Tracy SysTrace (ETW)" ); etw::EventConsumerLoop( consumer_kernel ); } void SysTraceGetExternalName( uint64_t thread, const char*& threadName, const char*& name ) { bool threadSent = false; auto hnd = OpenThread( THREAD_QUERY_INFORMATION, FALSE, DWORD( thread ) ); if( hnd == 0 ) { hnd = OpenThread( THREAD_QUERY_LIMITED_INFORMATION, FALSE, DWORD( thread ) ); } if( hnd != 0 ) { if( _GetThreadDescription ) { PWSTR tmp; if ( SUCCEEDED( _GetThreadDescription( hnd, &tmp ) ) ) { char buf[256]; auto ret = wcstombs( buf, tmp, 256 ); LocalFree(tmp); if( ret != 0 ) { threadName = CopyString( buf, ret ); threadSent = true; } } } const auto pid = GetProcessIdOfThread( hnd ); if( !threadSent && NtQueryInformationThread && _EnumProcessModules && _GetModuleInformation && _GetModuleBaseNameA ) { void* ptr; ULONG retlen; auto status = NtQueryInformationThread( hnd, (THREADINFOCLASS)9 /*ThreadQuerySetWin32StartAddress*/, &ptr, sizeof( &ptr ), &retlen ); if( status == 0 ) { const auto phnd = OpenProcess( PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, pid ); if( phnd != INVALID_HANDLE_VALUE ) { MEMORY_BASIC_INFORMATION vmeminfo; SIZE_T infosize = VirtualQueryEx( phnd, ptr, &vmeminfo, sizeof( vmeminfo ) ); if( infosize == sizeof( vmeminfo ) ) { if (vmeminfo.Type == MEM_IMAGE) { // for MEM_IMAGE regions, vmeminfo.AllocationBase _is_ the HMODULE HMODULE mod = (HMODULE)vmeminfo.AllocationBase; MODULEINFO info; if( _GetModuleInformation( phnd, mod, &info, sizeof( info ) ) != 0 ) { char buf2[1024]; const auto modlen = _GetModuleBaseNameA( phnd, mod, buf2, 1024 ); if( modlen != 0 ) { threadName = CopyString( buf2, modlen ); threadSent = true; } } } } CloseHandle( phnd ); } } } CloseHandle( hnd ); if( !threadSent ) { threadName = CopyString( "???", 3 ); threadSent = true; } if( pid != 0 ) { { uint64_t _pid = pid; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, thread ); MemWrite( &item->tidToPid.pid, _pid ); TracyLfqCommit; } if( pid == 4 ) { name = CopyStringFast( "System", 6 ); return; } else { const auto phnd = OpenProcess( PROCESS_QUERY_LIMITED_INFORMATION, FALSE, pid ); if( phnd != INVALID_HANDLE_VALUE ) { char buf2[1024]; const auto sz = GetProcessImageFileNameA( phnd, buf2, 1024 ); CloseHandle( phnd ); if( sz != 0 ) { auto ptr = buf2 + sz - 1; while( ptr > buf2 && *ptr != '\\' ) ptr--; if( *ptr == '\\' ) ptr++; name = CopyStringFast( ptr ); return; } } } } } if( !threadSent ) { threadName = CopyString( "???", 3 ); } name = CopyStringFast( "???", 3 ); } } # elif defined __linux__ # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # include # if defined __i386 || defined __x86_64__ # include "TracyCpuid.hpp" # endif # include "TracyProfiler.hpp" # include "TracyRingBuffer.hpp" # include "TracyThread.hpp" namespace tracy { static std::atomic traceActive { false }; static int s_numCpus = 0; static int s_numBuffers = 0; static int s_ctxBufferIdx = 0; static RingBuffer* s_ring = nullptr; static const int ThreadHashSize = 4 * 1024; static uint32_t s_threadHash[ThreadHashSize] = {}; static bool CurrentProcOwnsThread( uint32_t tid ) { const auto hash = tid & ( ThreadHashSize-1 ); const auto hv = s_threadHash[hash]; if( hv == tid ) return true; if( hv == -tid ) return false; char path[256]; sprintf( path, "/proc/self/task/%d", tid ); struct stat st; if( stat( path, &st ) == 0 ) { s_threadHash[hash] = tid; return true; } else { s_threadHash[hash] = -tid; return false; } } static int perf_event_open( struct perf_event_attr* hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags ) { return syscall( __NR_perf_event_open, hw_event, pid, cpu, group_fd, flags ); } enum TraceEventId { EventCallstack, EventCpuCycles, EventInstructionsRetired, EventCacheReference, EventCacheMiss, EventBranchRetired, EventBranchMiss, EventVsync, EventContextSwitch, EventWaking, }; static void ProbePreciseIp( perf_event_attr& pe, unsigned long long config0, unsigned long long config1, pid_t pid ) { pe.config = config1; pe.precise_ip = 3; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } pe.config = config0; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } TracyDebug( " Probed precise_ip: %i", pe.precise_ip ); } static void ProbePreciseIp( perf_event_attr& pe, pid_t pid ) { pe.precise_ip = 3; while( pe.precise_ip != 0 ) { const int fd = perf_event_open( &pe, pid, 0, -1, PERF_FLAG_FD_CLOEXEC ); if( fd != -1 ) { close( fd ); break; } pe.precise_ip--; } TracyDebug( " Probed precise_ip: %i", pe.precise_ip ); } static bool IsGenuineIntel() { #if defined __i386 || defined __x86_64__ uint32_t regs[4] = {}; __get_cpuid( 0, regs, regs+1, regs+2, regs+3 ); char manufacturer[12]; memcpy( manufacturer, regs+1, 4 ); memcpy( manufacturer+4, regs+3, 4 ); memcpy( manufacturer+8, regs+2, 4 ); return memcmp( manufacturer, "GenuineIntel", 12 ) == 0; #else return false; #endif } static const char* ReadFile( const char* path ) { int fd = open( path, O_RDONLY ); if( fd < 0 ) return nullptr; static char tmp[64]; const auto cnt = read( fd, tmp, 63 ); close( fd ); if( cnt < 0 ) return nullptr; tmp[cnt] = '\0'; return tmp; } static const char* ReadFile( const char* base, const char* path ) { const auto blen = strlen( base ); const auto plen = strlen( path ); auto tmp = (char*)tracy_malloc( blen + plen + 1 ); memcpy( tmp, base, blen ); memcpy( tmp + blen, path, plen ); tmp[blen+plen] = '\0'; auto res = ReadFile( tmp ); tracy_free( tmp ); return res; } static char* GetTraceFsPath() { auto f = setmntent( "/proc/mounts", "r" ); if( !f ) return nullptr; char* ret = nullptr; while( auto ent = getmntent( f ) ) { if( strcmp( ent->mnt_fsname, "tracefs" ) == 0 ) { auto len = strlen( ent->mnt_dir ); ret = (char*)tracy_malloc( len + 1 ); memcpy( ret, ent->mnt_dir, len ); ret[len] = '\0'; break; } } endmntent( f ); return ret; } bool SysTraceStart( int64_t& samplingPeriod ) { #ifndef CLOCK_MONOTONIC_RAW return false; #endif const auto paranoidLevelStr = ReadFile( "/proc/sys/kernel/perf_event_paranoid" ); if( !paranoidLevelStr ) return false; #ifdef TRACY_VERBOSE int paranoidLevel = 2; paranoidLevel = atoi( paranoidLevelStr ); TracyDebug( "perf_event_paranoid: %i", paranoidLevel ); #endif auto traceFsPath = GetTraceFsPath(); if( !traceFsPath ) return false; TracyDebug( "tracefs path: %s", traceFsPath ); int switchId = -1, wakingId = -1, vsyncId = -1; const auto switchIdStr = ReadFile( traceFsPath, "/events/sched/sched_switch/id" ); if( switchIdStr ) switchId = atoi( switchIdStr ); const auto wakingIdStr = ReadFile( traceFsPath, "/events/sched/sched_waking/id" ); if( wakingIdStr ) wakingId = atoi( wakingIdStr ); const auto vsyncIdStr = ReadFile( traceFsPath, "/events/drm/drm_vblank_event/id" ); if( vsyncIdStr ) vsyncId = atoi( vsyncIdStr ); tracy_free( traceFsPath ); TracyDebug( "sched_switch id: %i", switchId ); TracyDebug( "sched_waking id: %i", wakingId ); TracyDebug( "drm_vblank_event id: %i", vsyncId ); #ifdef TRACY_NO_SAMPLING const bool noSoftwareSampling = true; #else const char* noSoftwareSamplingEnv = GetEnvVar( "TRACY_NO_SAMPLING" ); const bool noSoftwareSampling = noSoftwareSamplingEnv && noSoftwareSamplingEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_RETIREMENT const bool noRetirement = true; #else const char* noRetirementEnv = GetEnvVar( "TRACY_NO_SAMPLE_RETIREMENT" ); const bool noRetirement = noRetirementEnv && noRetirementEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_CACHE const bool noCache = true; #else const char* noCacheEnv = GetEnvVar( "TRACY_NO_SAMPLE_CACHE" ); const bool noCache = noCacheEnv && noCacheEnv[0] == '1'; #endif #ifdef TRACY_NO_SAMPLE_BRANCH const bool noBranch = true; #else const char* noBranchEnv = GetEnvVar( "TRACY_NO_SAMPLE_BRANCH" ); const bool noBranch = noBranchEnv && noBranchEnv[0] == '1'; #endif #ifdef TRACY_NO_CONTEXT_SWITCH const bool noCtxSwitch = true; #else const char* noCtxSwitchEnv = GetEnvVar( "TRACY_NO_CONTEXT_SWITCH" ); const bool noCtxSwitch = noCtxSwitchEnv && noCtxSwitchEnv[0] == '1'; #endif #ifdef TRACY_NO_VSYNC_CAPTURE const bool noVsync = true; #else const char* noVsyncEnv = GetEnvVar( "TRACY_NO_VSYNC_CAPTURE" ); const bool noVsync = noVsyncEnv && noVsyncEnv[0] == '1'; #endif samplingPeriod = GetSamplingPeriod(); uint32_t currentPid = (uint32_t)getpid(); s_numCpus = (int)std::thread::hardware_concurrency(); const auto maxNumBuffers = s_numCpus * ( 1 + // software sampling 2 + // CPU cycles + instructions retired 2 + // cache reference + miss 2 + // branch retired + miss 2 + // context switches + waking ups 1 // vsync ); s_ring = (RingBuffer*)tracy_malloc( sizeof( RingBuffer ) * maxNumBuffers ); s_numBuffers = 0; // software sampling perf_event_attr pe = {}; pe.type = PERF_TYPE_SOFTWARE; pe.size = sizeof( perf_event_attr ); pe.config = PERF_COUNT_SW_CPU_CLOCK; pe.sample_freq = GetSamplingFrequency(); pe.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME | PERF_SAMPLE_CALLCHAIN; #if LINUX_VERSION_CODE >= KERNEL_VERSION( 4, 8, 0 ) pe.sample_max_stack = 127; #endif pe.disabled = 1; pe.freq = 1; pe.inherit = 1; #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) pe.use_clockid = 1; pe.clockid = CLOCK_MONOTONIC_RAW; #endif if( !noSoftwareSampling ) { TracyDebug( "Setup software sampling" ); ProbePreciseIp( pe, currentPid ); for( int i=0; i= KERNEL_VERSION( 4, 8, 0 ) pe.sample_max_stack = 127; #endif pe.disabled = 1; pe.inherit = 1; pe.config = switchId; #if !defined TRACY_HW_TIMER || !( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) pe.use_clockid = 1; pe.clockid = CLOCK_MONOTONIC_RAW; #endif TracyDebug( "Setup context switch capture" ); for( int i=0; i> 63; const auto m2 = test >> 47; if( m1 == m2 ) break; } while( --cnt > 0 ); for( uint64_t j=1; j> 63; const auto m2 = test >> 47; if( m1 != m2 ) trace[j] = 0; } #endif for( uint64_t j=1; j<=cnt; j++ ) { if( trace[j] >= (uint64_t)-4095 ) // PERF_CONTEXT_MAX { memmove( trace+j, trace+j+1, sizeof( uint64_t ) * ( cnt - j ) ); cnt--; } } memcpy( trace, &cnt, sizeof( uint64_t ) ); return trace; } void SysTraceWorker( void* ptr ) { ThreadExitHandler threadExitHandler; SetThreadName( "Tracy Sampling" ); InitRpmalloc(); sched_param sp = { 99 }; if( pthread_setschedparam( pthread_self(), SCHED_FIFO, &sp ) != 0 ) TracyDebug( "Failed to increase SysTraceWorker thread priority!" ); auto ctxBufferIdx = s_ctxBufferIdx; auto ringArray = s_ring; auto numBuffers = s_numBuffers; for( int i=0; i tail ); hadData = true; const auto id = ring.GetId(); assert( id != EventContextSwitch ); const auto end = head - tail; uint64_t pos = 0; if( id == EventCallstack ) { while( pos < end ) { perf_event_header hdr; ring.Read( &hdr, pos, sizeof( perf_event_header ) ); if( hdr.type == PERF_RECORD_SAMPLE ) { auto offset = pos + sizeof( perf_event_header ); // Layout: // u32 pid, tid // u64 time // u64 cnt // u64 ip[cnt] uint32_t tid; uint64_t t0; uint64_t cnt; offset += sizeof( uint32_t ); ring.Read( &tid, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ); ring.Read( &t0, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); ring.Read( &cnt, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); if( cnt > 0 ) { #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif auto trace = GetCallstackBlock( cnt, ring, offset ); TracyLfqPrepare( QueueType::CallstackSample ); MemWrite( &item->callstackSampleFat.time, t0 ); MemWrite( &item->callstackSampleFat.thread, tid ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } pos += hdr.size; } } else { while( pos < end ) { perf_event_header hdr; ring.Read( &hdr, pos, sizeof( perf_event_header ) ); if( hdr.type == PERF_RECORD_SAMPLE ) { auto offset = pos + sizeof( perf_event_header ); // Layout: // u64 ip // u64 time uint64_t ip, t0; ring.Read( &ip, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); ring.Read( &t0, offset, sizeof( uint64_t ) ); #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif QueueType type; switch( id ) { case EventCpuCycles: type = QueueType::HwSampleCpuCycle; break; case EventInstructionsRetired: type = QueueType::HwSampleInstructionRetired; break; case EventCacheReference: type = QueueType::HwSampleCacheReference; break; case EventCacheMiss: type = QueueType::HwSampleCacheMiss; break; case EventBranchRetired: type = QueueType::HwSampleBranchRetired; break; case EventBranchMiss: type = QueueType::HwSampleBranchMiss; break; default: abort(); } TracyLfqPrepare( type ); MemWrite( &item->hwSample.ip, ip ); MemWrite( &item->hwSample.time, t0 ); TracyLfqCommit; } pos += hdr.size; } } assert( pos == end ); ring.Advance( end ); } if( !traceActive.load( std::memory_order_relaxed ) ) break; if( ctxBufferIdx != numBuffers ) { const auto ctxBufNum = numBuffers - ctxBufferIdx; int activeNum = 0; uint16_t active[512]; uint32_t end[512]; uint32_t pos[512]; for( int i=0; i 0 ) { hadData = true; while( activeNum > 0 ) { // Find the earliest event from the active buffers int sel = -1; int selPos; int64_t t0 = std::numeric_limits::max(); for( int i=0; i= 0 ) { auto& ring = ringArray[ctxBufferIdx + sel]; auto rbPos = pos[sel]; auto offset = rbPos; perf_event_header hdr; ring.Read( &hdr, offset, sizeof( perf_event_header ) ); #if defined TRACY_HW_TIMER && ( defined __i386 || defined _M_IX86 || defined __x86_64__ || defined _M_X64 ) t0 = ring.ConvertTimeToTsc( t0 ); #endif const auto rid = ring.GetId(); if( rid == EventContextSwitch ) { // Layout: See /sys/kernel/debug/tracing/events/sched/sched_switch/format // u64 time // PERF_SAMPLE_TIME // u64 cnt // PERF_SAMPLE_CALLCHAIN // u64 ip[cnt] // PERF_SAMPLE_CALLCHAIN // u32 size // u8 data[size] // Data (not ABI stable, but has not changed since it was added, in 2009): // u8 hdr[8] // u8 prev_comm[16] // u32 prev_pid // u32 prev_prio // lng prev_state // u8 next_comm[16] // u32 next_pid // u32 next_prio offset += sizeof( perf_event_header ) + sizeof( uint64_t ); uint64_t cnt; ring.Read( &cnt, offset, sizeof( uint64_t ) ); offset += sizeof( uint64_t ); const auto traceOffset = offset; offset += sizeof( uint64_t ) * cnt + sizeof( uint32_t ) + 8 + 16; uint32_t prev_pid, prev_prio; uint32_t next_pid, next_prio; long prev_state; ring.Read( &prev_pid, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ); ring.Read( &prev_prio, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ); ring.Read( &prev_state, offset, sizeof( long ) ); offset += sizeof( long ) + 16; ring.Read( &next_pid, offset, sizeof( uint32_t ) ); offset += sizeof( uint32_t ); ring.Read( &next_prio, offset, sizeof( uint32_t ) ); uint8_t oldThreadWaitReason = 100; uint8_t oldThreadState; if( prev_state & 0x0001 ) oldThreadState = 104; else if( prev_state & 0x0002 ) oldThreadState = 101; else if( prev_state & 0x0004 ) oldThreadState = 105; else if( prev_state & 0x0008 ) oldThreadState = 106; else if( prev_state & 0x0010 ) oldThreadState = 108; else if( prev_state & 0x0020 ) oldThreadState = 109; else if( prev_state & 0x0040 ) oldThreadState = 110; else if( prev_state & 0x0080 ) oldThreadState = 102; else oldThreadState = 103; TracyLfqPrepare( QueueType::ContextSwitch ); MemWrite( &item->contextSwitch.time, t0 ); MemWrite( &item->contextSwitch.oldThread, prev_pid ); MemWrite( &item->contextSwitch.newThread, next_pid ); MemWrite( &item->contextSwitch.cpu, uint8_t( ring.GetCpu() ) ); MemWrite( &item->contextSwitch.oldThreadWaitReason, oldThreadWaitReason ); MemWrite( &item->contextSwitch.oldThreadState, oldThreadState ); MemWrite( &item->contextSwitch.previousCState, uint8_t( 0 ) ); MemWrite( &item->contextSwitch.newThreadPriority, int8_t( next_prio ) ); MemWrite( &item->contextSwitch.oldThreadPriority, int8_t( prev_prio ) ); TracyLfqCommit; if( cnt > 0 && prev_pid != 0 && CurrentProcOwnsThread( prev_pid ) ) { auto trace = GetCallstackBlock( cnt, ring, traceOffset ); TracyLfqPrepare( QueueType::CallstackSampleContextSwitch ); MemWrite( &item->callstackSampleFat.time, t0 ); MemWrite( &item->callstackSampleFat.thread, prev_pid ); MemWrite( &item->callstackSampleFat.ptr, (uint64_t)trace ); TracyLfqCommit; } } else if( rid == EventWaking) { // See /sys/kernel/debug/tracing/events/sched/sched_waking/format // Layout: // u64 time // PERF_SAMPLE_TIME // u32 size // u8 data[size] // Data: // u8 hdr[8] // u8 comm[16] // u32 pid // i32 prio // i32 target_cpu const uint32_t dataOffset = sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ); offset += dataOffset + 8 + 16; uint32_t pid; ring.Read( &pid, offset, sizeof( uint32_t ) ); TracyLfqPrepare( QueueType::ThreadWakeup ); MemWrite( &item->threadWakeup.time, t0 ); MemWrite( &item->threadWakeup.thread, pid ); MemWrite( &item->threadWakeup.cpu, (uint8_t)ring.GetCpu() ); int8_t adjustReason = -1; // Does not exist on Linux int8_t adjustIncrement = 0; // Should perhaps store the new prio? MemWrite( &item->threadWakeup.adjustReason, adjustReason ); MemWrite( &item->threadWakeup.adjustIncrement, adjustIncrement ); TracyLfqCommit; } else { assert( rid == EventVsync ); // Layout: // u64 time // u32 size // u8 data[size] // Data (not ABI stable): // u8 hdr[8] // i32 crtc // u32 seq // i64 ktime // u8 high precision offset += sizeof( perf_event_header ) + sizeof( uint64_t ) + sizeof( uint32_t ) + 8; int32_t crtc; ring.Read( &crtc, offset, sizeof( int32_t ) ); // Note: The timestamp value t0 might be off by a number of microseconds from the // true hardware vblank event. The ktime value should be used instead, but it is // measured in CLOCK_MONOTONIC time. Tracy only supports the timestamp counter // register (TSC) or CLOCK_MONOTONIC_RAW clock. #if 0 offset += sizeof( uint32_t ) * 2; int64_t ktime; ring.Read( &ktime, offset, sizeof( int64_t ) ); #endif TracyLfqPrepare( QueueType::FrameVsync ); MemWrite( &item->frameVsync.id, crtc ); MemWrite( &item->frameVsync.time, t0 ); TracyLfqCommit; } rbPos += hdr.size; if( rbPos == end[sel] ) { memmove( active+selPos, active+selPos+1, sizeof(*active) * ( activeNum - selPos - 1 ) ); activeNum--; } else { pos[sel] = rbPos; } } } for( int i=0; i 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; threadName = CopyString( buf ); fclose( f ); } else { threadName = CopyString( "???", 3 ); } sprintf( fn, "/proc/%" PRIu64 "/status", thread ); f = fopen( fn, "rb" ); if( f ) { char* tmp = (char*)tracy_malloc_fast( 8*1024 ); const auto fsz = (ptrdiff_t)fread( tmp, 1, 8*1024, f ); fclose( f ); int pid = -1; auto line = tmp; for(;;) { if( memcmp( "Tgid:\t", line, 6 ) == 0 ) { pid = atoi( line + 6 ); break; } while( line - tmp < fsz && *line != '\n' ) line++; if( *line != '\n' ) break; line++; } tracy_free_fast( tmp ); if( pid >= 0 ) { { uint64_t _pid = pid; TracyLfqPrepare( QueueType::TidToPid ); MemWrite( &item->tidToPid.tid, thread ); MemWrite( &item->tidToPid.pid, _pid ); TracyLfqCommit; } sprintf( fn, "/proc/%i/comm", pid ); f = fopen( fn, "rb" ); if( f ) { char buf[256]; const auto sz = fread( buf, 1, 256, f ); if( sz > 0 && buf[sz-1] == '\n' ) buf[sz-1] = '\0'; name = CopyStringFast( buf ); fclose( f ); return; } } } name = CopyStringFast( "???", 3 ); } } # endif #endif