summaryrefslogtreecommitdiffstats
path: root/external/optick/optick_gpu.h
blob: f028f8a50b43259f63f088834631b95db73c2248 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#pragma once
#include "optick.config.h"

#if USE_OPTICK

#include <atomic>
#include <mutex>

#include "optick_common.h"
#include "optick_memory.h"

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
namespace Optick
{
	const char* GetGPUQueueName(GPUQueueType queue);
	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
	class GPUProfiler
	{
	public:
		static const int MAX_FRAME_EVENTS = 1024;
		static const int NUM_FRAMES_DELAY = 4;
		static const int MAX_QUERIES_COUNT = (2 * MAX_FRAME_EVENTS) * NUM_FRAMES_DELAY;
	protected:

		enum State
		{
			STATE_OFF,
			STATE_STARTING,
			STATE_RUNNING,
			STATE_FINISHING,
		};

		struct ClockSynchronization
		{
			int64_t frequencyCPU;
			int64_t frequencyGPU;
			int64_t timestampCPU;
			int64_t timestampGPU;

			int64_t GetCPUTimestamp(int64_t gpuTimestamp)
			{
				return timestampCPU + (gpuTimestamp - timestampGPU) * frequencyCPU / frequencyGPU;
			}

			ClockSynchronization() : frequencyCPU(0), frequencyGPU(0), timestampCPU(0), timestampGPU(0) {}
		};

		struct QueryFrame
		{
			EventData* frameEvent;
			uint32_t queryIndexStart;
			uint32_t queryIndexCount;

			QueryFrame()
			{
				Reset();
			}

			void Reset()
			{
				frameEvent = nullptr;
				queryIndexStart = (uint32_t)-1;
				queryIndexCount = 0;
			}
		};

		struct Node
		{
			array<QueryFrame, NUM_FRAMES_DELAY> queryGpuframes;
			array<int64_t, MAX_QUERIES_COUNT> queryGpuTimestamps;
			array<int64_t*, MAX_QUERIES_COUNT> queryCpuTimestamps;
			std::atomic<uint32_t> queryIndex;

			ClockSynchronization clock;

			array<EventStorage*, GPU_QUEUE_COUNT> gpuEventStorage;

			uint32_t QueryTimestamp(int64_t* outCpuTimestamp)
			{
				uint32_t index = queryIndex.fetch_add(1) % MAX_QUERIES_COUNT;
				queryCpuTimestamps[index] = outCpuTimestamp;
				return index;
			}

			string name;

			void Reset();

			Node() : queryIndex(0) { gpuEventStorage.fill(nullptr); }
		};

		std::recursive_mutex updateLock;
		volatile State currentState;

		vector<Node*> nodes;
		uint32_t currentNode;

		uint32_t frameNumber;

		void Reset();

		EventData& AddFrameEvent();
		EventData& AddVSyncEvent();
		TagData<uint32>& AddFrameTag();

	public:
		GPUProfiler();

		// Init
		virtual void InitNode(const char* nodeName, uint32_t nodeIndex);

		// Capture Controls 
		virtual void Start(uint32 mode);
		virtual void Stop(uint32 mode);
		virtual void Dump(uint32 mode);

		virtual string GetName() const;

		// Interface to implement
		virtual ClockSynchronization GetClockSynchronization(uint32_t nodeIndex) = 0;
		virtual void QueryTimestamp(void* context, int64_t* cpuTimestampOut) = 0;
		virtual void Flip(void* swapChain) = 0;

		virtual ~GPUProfiler();
	};
	////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
}

#endif //USE_OPTICK