1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
#pragma once
#include "optick.config.h"
#if USE_OPTICK
#include <atomic>
#include <mutex>
#include "optick_common.h"
#include "optick_memory.h"
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace Optick
{
const char* GetGPUQueueName(GPUQueueType queue);
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class GPUProfiler
{
public:
static const int MAX_FRAME_EVENTS = 1024;
static const int NUM_FRAMES_DELAY = 4;
static const int MAX_QUERIES_COUNT = (2 * MAX_FRAME_EVENTS) * NUM_FRAMES_DELAY;
protected:
enum State
{
STATE_OFF,
STATE_STARTING,
STATE_RUNNING,
STATE_FINISHING,
};
struct ClockSynchronization
{
int64_t frequencyCPU;
int64_t frequencyGPU;
int64_t timestampCPU;
int64_t timestampGPU;
int64_t GetCPUTimestamp(int64_t gpuTimestamp)
{
return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU;
}
ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {}
};
struct QueryFrame
{
EventData* frameEvent;
uint32_t queryIndexStart;
uint32_t queryIndexCount;
QueryFrame()
{
Reset();
}
void Reset()
{
frameEvent = nullptr;
queryIndexStart = (uint32_t)-1;
queryIndexCount = 0;
}
};
struct Node
{
array<QueryFrame, NUM_FRAMES_DELAY> queryGpuframes;
array<int64_t, MAX_QUERIES_COUNT> queryGpuTimestamps;
array<int64_t*, MAX_QUERIES_COUNT> queryCpuTimestamps;
std::atomic<uint32_t> queryIndex;
ClockSynchronization clock;
array<EventStorage*, GPU_QUEUE_COUNT> gpuEventStorage;
uint32_t QueryTimestamp(int64_t* outCpuTimestamp)
{
uint32_t index = queryIndex.fetch_add(1) % MAX_QUERIES_COUNT;
queryCpuTimestamps[index] = outCpuTimestamp;
return index;
}
string name;
void Reset();
Node() : queryIndex(0) { gpuEventStorage.fill(nullptr); }
};
std::recursive_mutex updateLock;
volatile State currentState;
vector<Node*> nodes;
uint32_t currentNode;
uint32_t frameNumber;
void Reset();
EventData& AddFrameEvent();
EventData& AddVSyncEvent();
TagData<uint32>& AddFrameTag();
public:
GPUProfiler();
// Init
virtual void InitNode(const char* nodeName, uint32_t nodeIndex);
// Capture Controls
virtual void Start(uint32 mode);
virtual void Stop(uint32 mode);
virtual void Dump(uint32 mode);
virtual string GetName() const;
// Interface to implement
virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0;
virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
virtual void Flip(void* swapChain) = 0;
virtual ~GPUProfiler();
};
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}
#endif //USE_OPTICK
|