diff options
author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2023-08-06 09:38:16 +0200 |
---|---|---|
committer | Fernando Sahmkow <fsahmkow27@gmail.com> | 2023-09-23 23:05:30 +0200 |
commit | 282ae8fa51e060e6d4ef026b734aa871b1b9331e (patch) | |
tree | 3bc4603b6add0582315dc65544f1986427e4182d | |
parent | QueryCache: Implement dependant queries. (diff) | |
download | yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.gz yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.bz2 yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.lz yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.xz yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.tar.zst yuzu-282ae8fa51e060e6d4ef026b734aa871b1b9331e.zip |
21 files changed, 270 insertions, 214 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index f91b7d1e4..9e90c587c 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -276,9 +276,8 @@ std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_ad } template <class P> -std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer(VAddr cpu_addr, u32 size, - ObtainBufferSynchronize sync_info, - ObtainBufferOperation post_op) { +std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainCPUBuffer( + VAddr cpu_addr, u32 size, ObtainBufferSynchronize sync_info, ObtainBufferOperation post_op) { const BufferId buffer_id = FindBuffer(cpu_addr, size); Buffer& buffer = slot_buffers[buffer_id]; diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 9507071e5..c4f6e8d12 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -297,8 +297,8 @@ public: ObtainBufferOperation post_op); [[nodiscard]] std::pair<Buffer*, u32> ObtainCPUBuffer(VAddr gpu_addr, u32 size, - ObtainBufferSynchronize sync_info, - ObtainBufferOperation post_op); + ObtainBufferSynchronize sync_info, + ObtainBufferOperation post_op); void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 922c399e6..46b9c548a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -596,12 +596,6 @@ void Maxwell3D::ProcessCounterReset() { case Regs::ClearReport::ZPassPixelCount: rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); break; - case Regs::ClearReport::PrimitivesGenerated: - rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); - break; - case Regs::ClearReport::VtgPrimitivesOut: - rasterizer->ResetCounter(VideoCommon::QueryType::StreamingByteCount); - break; default: LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", regs.clear_report_value); break; diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 582738234..8dd34c04a 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -82,7 +82,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { if (op == GpuSemaphoreOperation::WriteLong) { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_sequence; - rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::HasTimeout, payload, 0); } else { do { const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; @@ -117,7 +118,8 @@ void Puller::ProcessSemaphoreTriggerMethod() { void Puller::ProcessSemaphoreRelease() { const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; const u32 payload = regs.semaphore_release; - rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); + rasterizer->Query(sequence_address, VideoCommon::QueryType::Payload, + VideoCommon::QueryPropertiesFlags::IsAFence, payload, 0); } void Puller::ProcessSemaphoreAcquire() { diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 8459a3092..805a89900 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -55,6 +55,9 @@ public: // Unlike other fences, this one doesn't void SignalOrdering() { + if constexpr (!can_async_check) { + TryReleasePendingFences<false>(); + } std::scoped_lock lock{buffer_cache.mutex}; buffer_cache.AccumulateFlushes(); } @@ -104,13 +107,9 @@ public: SignalFence(std::move(func)); } - void WaitPendingFences(bool force) { + void WaitPendingFences([[maybe_unused]] bool force) { if constexpr (!can_async_check) { - if (force) { - TryReleasePendingFences<true>(); - } else { - TryReleasePendingFences<false>(); - } + TryReleasePendingFences<true>(); } else { if (!force) { return; @@ -125,7 +124,8 @@ public: }); SignalFence(std::move(func)); std::unique_lock lk(wait_mutex); - wait_cv.wait(lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); + wait_cv.wait( + lk, [&wait_finished] { return wait_finished.load(std::memory_order_relaxed); }); } } diff --git a/src/video_core/query_cache/bank_base.h b/src/video_core/query_cache/bank_base.h index 4246a609d..420927091 100644 --- a/src/video_core/query_cache/bank_base.h +++ b/src/video_core/query_cache/bank_base.h @@ -7,21 +7,19 @@ #include <deque> #include <utility> - #include "common/common_types.h" namespace VideoCommon { class BankBase { protected: - const size_t base_bank_size; - size_t bank_size; - std::atomic<size_t> references; - size_t current_slot; + const size_t base_bank_size{}; + size_t bank_size{}; + std::atomic<size_t> references{}; + size_t current_slot{}; public: - BankBase(size_t bank_size_) - : base_bank_size{bank_size_}, bank_size(bank_size_), references(0), current_slot(0) {} + explicit BankBase(size_t bank_size_) : base_bank_size{bank_size_}, bank_size(bank_size_) {} virtual ~BankBase() = default; @@ -58,11 +56,11 @@ public: bank_size = current_slot; } - constexpr bool IsClosed() { + bool IsClosed() const { return current_slot >= bank_size; } - bool IsDead() { + bool IsDead() const { return IsClosed() && references == 0; } }; diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h index 0ae23af9f..993a13eac 100644 --- a/src/video_core/query_cache/query_base.h +++ b/src/video_core/query_cache/query_base.h @@ -9,28 +9,28 @@ namespace VideoCommon { enum class QueryFlagBits : u32 { - HasTimestamp = 1 << 0, ///< Indicates if this query has a tiemstamp. - IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host - IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host - IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. - IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query - IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query - IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. - IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. - IsFence = 1 << 8, ///< Indicates the query is a fence. - IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment + HasTimestamp = 1 << 0, ///< Indicates if this query has a timestamp. + IsFinalValueSynced = 1 << 1, ///< Indicates if the query has been synced in the host + IsHostSynced = 1 << 2, ///< Indicates if the query has been synced in the host + IsGuestSynced = 1 << 3, ///< Indicates if the query has been synced with the guest. + IsHostManaged = 1 << 4, ///< Indicates if this query points to a host query + IsRewritten = 1 << 5, ///< Indicates if this query was rewritten by another query + IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. + IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. + IsFence = 1 << 8, ///< Indicates the query is a fence. + IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment }; DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) class QueryBase { public: - VAddr guest_address; - QueryFlagBits flags; - u64 value; + VAddr guest_address{}; + QueryFlagBits flags{}; + u64 value{}; protected: // Default constructor - QueryBase() : guest_address(0), flags{}, value{} {} + QueryBase() = default; // Parameterized constructor QueryBase(VAddr address, QueryFlagBits flags_, u64 value_) @@ -51,23 +51,21 @@ public: class HostQueryBase : public QueryBase { public: // Default constructor - HostQueryBase() - : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0), start_bank_id{}, - size_banks{}, start_slot{}, size_slots{} {} + HostQueryBase() : QueryBase(0, QueryFlagBits::IsHostManaged | QueryFlagBits::IsOrphan, 0) {} // Parameterized constructor - HostQueryBase(bool isLong, VAddr address) + HostQueryBase(bool has_timestamp, VAddr address) : QueryBase(address, QueryFlagBits::IsHostManaged, 0), start_bank_id{}, size_banks{}, start_slot{}, size_slots{} { - if (isLong) { + if (has_timestamp) { flags |= QueryFlagBits::HasTimestamp; } } - u32 start_bank_id; - u32 size_banks; - size_t start_slot; - size_t size_slots; + u32 start_bank_id{}; + u32 size_banks{}; + size_t start_slot{}; + size_t size_slots{}; }; } // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index f1393d5c7..042af053c 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -54,7 +54,7 @@ public: return new_id; } - bool HasPendingSync() override { + bool HasPendingSync() const override { return !pending_sync.empty(); } @@ -71,8 +71,10 @@ public: continue; } query.flags |= QueryFlagBits::IsHostSynced; - sync_values.emplace_back(query.guest_address, query.value, - True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4); + sync_values.emplace_back(SyncValuesStruct{ + .address = query.guest_address, + .value = query.value, + .size = static_cast<u64>(True(query.flags & QueryFlagBits::HasTimestamp) ? 8 : 4)}); } pending_sync.clear(); if (sync_values.size() > 0) { @@ -90,15 +92,20 @@ class StubStreamer : public GuestStreamer<Traits> { public: using RuntimeType = typename Traits::RuntimeType; - StubStreamer(size_t id_, RuntimeType& runtime_) : GuestStreamer<Traits>(id_, runtime_) {} + StubStreamer(size_t id_, RuntimeType& runtime_, u32 stub_value_) + : GuestStreamer<Traits>(id_, runtime_), stub_value{stub_value_} {} ~StubStreamer() override = default; size_t WriteCounter(VAddr address, bool has_timestamp, [[maybe_unused]] u32 value, std::optional<u32> subreport = std::nullopt) override { - size_t new_id = GuestStreamer<Traits>::WriteCounter(address, has_timestamp, 1U, subreport); + size_t new_id = + GuestStreamer<Traits>::WriteCounter(address, has_timestamp, stub_value, subreport); return new_id; } + +private: + u32 stub_value; }; template <typename Traits> @@ -113,7 +120,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); if (streamers[i]) { - streamer_mask |= 1ULL << i; + streamer_mask |= 1ULL << streamers[i]->GetId(); } } } @@ -152,7 +159,7 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl { QueryCacheBase<Traits>* owner; VideoCore::RasterizerInterface& rasterizer; Core::Memory::Memory& cpu_memory; - Traits::RuntimeType& runtime; + RuntimeType& runtime; Tegra::GPU& gpu; std::array<StreamerInterface*, static_cast<size_t>(QueryType::MaxQueryTypes)> streamers; u64 streamer_mask; @@ -223,15 +230,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type const bool is_fence = True(flags & QueryPropertiesFlags::IsAFence); size_t streamer_id = static_cast<size_t>(counter_type); auto* streamer = impl->streamers[streamer_id]; - if (!streamer) [[unlikely]] { - if (has_timestamp) { - u64 timestamp = impl->gpu.GetTicks(); - gpu_memory->Write<u64>(addr + 8, timestamp); - gpu_memory->Write<u64>(addr, 1ULL); - } else { - gpu_memory->Write<u32>(addr, 1U); - } - return; + if (streamer == nullptr) [[unlikely]] { + counter_type = QueryType::Payload; + payload = 1U; + streamer_id = static_cast<size_t>(counter_type); + streamer = impl->streamers[streamer_id]; } auto cpu_addr_opt = gpu_memory->GpuToCpuAddress(addr); if (!cpu_addr_opt) [[unlikely]] { @@ -403,12 +406,6 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { impl->runtime.EndHostConditionalRendering(); return false; } - /*if (!Settings::IsGPULevelHigh()) { - impl->runtime.EndHostConditionalRendering(); - return gpu_memory->IsMemoryDirty(regs.render_enable.Address(), 24, - VideoCommon::CacheType::BufferCache | - VideoCommon::CacheType::QueryCache); - }*/ const ComparisonMode mode = static_cast<ComparisonMode>(regs.render_enable.mode); const GPUVAddr address = regs.render_enable.Address(); switch (mode) { @@ -442,6 +439,9 @@ bool QueryCacheBase<Traits>::AccelerateHostConditionalRendering() { // Async downloads template <typename Traits> void QueryCacheBase<Traits>::CommitAsyncFlushes() { + // Make sure to have the results synced in Host. + NotifyWFI(); + u64 mask{}; { std::scoped_lock lk(impl->flush_guard); @@ -458,8 +458,19 @@ void QueryCacheBase<Traits>::CommitAsyncFlushes() { if (mask == 0) { return; } - impl->ForEachStreamerIn(mask, - [](StreamerInterface* streamer) { streamer->PushUnsyncedQueries(); }); + u64 ran_mask = ~mask; + while (mask) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { + u64 dep_mask = streamer->GetDependentMask(); + if ((dep_mask & ~ran_mask) != 0) { + return; + } + u64 index = streamer->GetId(); + ran_mask |= (1ULL << index); + mask &= ~(1ULL << index); + streamer->PushUnsyncedQueries(); + }); + } } template <typename Traits> @@ -489,13 +500,11 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() { if (mask == 0) { return; } - u64 ran_mask = 0; - u64 next_phase = 0; + u64 ran_mask = ~mask; while (mask) { - impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) { + impl->ForEachStreamerIn(mask, [&mask, &ran_mask](StreamerInterface* streamer) { u64 dep_mask = streamer->GetDependenceMask(); if ((dep_mask & ~ran_mask) != 0) { - next_phase |= dep_mask; return; } u64 index = streamer->GetId(); @@ -503,7 +512,6 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() { mask &= ~(1ULL << index); streamer->PopUnsyncedQueries(); }); - ran_mask |= next_phase; } } diff --git a/src/video_core/query_cache/query_cache_base.h b/src/video_core/query_cache/query_cache_base.h index 55f508dd1..07be421c6 100644 --- a/src/video_core/query_cache/query_cache_base.h +++ b/src/video_core/query_cache/query_cache_base.h @@ -47,7 +47,7 @@ public: BitField<0, 27, u32> query_id; u32 raw; - std::pair<size_t, size_t> unpack() { + std::pair<size_t, size_t> unpack() const { return {static_cast<size_t>(stream_id.Value()), static_cast<size_t>(query_id.Value())}; } }; @@ -73,7 +73,7 @@ public: } } - static u64 BuildMask(std::span<QueryType> types) { + static u64 BuildMask(std::span<const QueryType> types) { u64 mask = 0; for (auto query_type : types) { mask |= 1ULL << (static_cast<u64>(query_type)); @@ -160,7 +160,7 @@ protected: } } - using ContentCache = typename std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; + using ContentCache = std::unordered_map<u64, std::unordered_map<u32, QueryLocation>>; void InvalidateQuery(QueryLocation location); bool IsQueryDirty(QueryLocation location); @@ -175,7 +175,7 @@ protected: friend struct QueryCacheBaseImpl; friend RuntimeType; - std::unique_ptr<struct QueryCacheBaseImpl> impl; + std::unique_ptr<QueryCacheBaseImpl> impl; }; } // namespace VideoCommon
\ No newline at end of file diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h index 0e9275565..e7aac955b 100644 --- a/src/video_core/query_cache/query_stream.h +++ b/src/video_core/query_cache/query_stream.h @@ -16,7 +16,7 @@ namespace VideoCommon { class StreamerInterface { public: - StreamerInterface(size_t id_, u64 dependance_mask_ = 0) : id{id_}, dependance_mask{dependance_mask_} {} + explicit StreamerInterface(size_t id_) : id{id_}, dependence_mask{}, dependent_mask{} {} virtual ~StreamerInterface() = default; virtual QueryBase* GetQuery(size_t id) = 0; @@ -37,7 +37,7 @@ public: /* Do Nothing */ } - virtual bool HasPendingSync() { + virtual bool HasPendingSync() const { return false; } @@ -52,7 +52,7 @@ public: virtual size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, std::optional<u32> subreport = std::nullopt) = 0; - virtual bool HasUnsyncedQueries() { + virtual bool HasUnsyncedQueries() const { return false; } @@ -71,18 +71,28 @@ public: } u64 GetDependenceMask() const { - return dependance_mask; + return dependence_mask; + } + + u64 GetDependentMask() const { + return dependence_mask; } protected: + void MakeDependent(StreamerInterface* depend_on) { + dependence_mask |= 1ULL << depend_on->id; + depend_on->dependent_mask |= 1ULL << id; + } + const size_t id; - const u64 dependance_mask; + u64 dependence_mask; + u64 dependent_mask; }; template <typename QueryType> class SimpleStreamer : public StreamerInterface { public: - SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {} + explicit SimpleStreamer(size_t id_) : StreamerInterface{id_} {} virtual ~SimpleStreamer() = default; protected: diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 2ba7cbb0d..af1469147 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -9,10 +9,10 @@ #include <utility> #include "common/common_types.h" #include "common/polyfill_thread.h" -#include "video_core/query_cache/types.h" #include "video_core/cache_types.h" #include "video_core/engines/fermi_2d.h" #include "video_core/gpu.h" +#include "video_core/query_cache/types.h" #include "video_core/rasterizer_download_area.h" namespace Tegra { @@ -57,7 +57,8 @@ public: virtual void ResetCounter(VideoCommon::QueryType type) = 0; /// Records a GPU query and caches it - virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; + virtual void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) = 0; /// Signal an uniform buffer binding virtual void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index 57a8c4c85..23001eeb8 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -43,7 +43,8 @@ public: void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCommon::QueryType type) override; - void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; + void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index a975bbe75..27e2de1bf 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -405,8 +405,6 @@ void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) { void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) { if (type == VideoCommon::QueryType::ZPassPixelCount64) { - std::optional<u64> timestamp{True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout) - ? std::make_optional<u64>(gpu.GetTicks()) : std:: nullopt }; if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { query_cache.Query(gpu_addr, VideoCore::QueryType::SamplesPassed, {gpu.GetTicks()}); } else { @@ -414,13 +412,23 @@ void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, } return; } - if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { - u64 ticks = gpu.GetTicks(); - gpu_memory->Write<u64>(gpu_addr + 8, ticks); - gpu_memory->Write<u64>(gpu_addr, static_cast<u64>(payload)); - } else { - gpu_memory->Write<u32>(gpu_addr, payload); + if (type != VideoCommon::QueryType::Payload) { + payload = 1u; + } + std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() { + if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) { + u64 ticks = gpu.GetTicks(); + memory_manager->Write<u64>(gpu_addr + 8, ticks); + memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload)); + } else { + memory_manager->Write<u32>(gpu_addr, payload); + } + }); + if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) { + SignalFence(std::move(func)); + return; } + func(); } void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 05e048e15..ceffe1f1e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -87,7 +87,8 @@ public: void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCommon::QueryType type) override; - void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; + void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 97cd4521d..039dc95e1 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -303,9 +303,9 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble( return {staging.buffer, staging.offset}; } -ConditionalRenderingResolvePass::ConditionalRenderingResolvePass(const Device& device_, - Scheduler& scheduler_, - DescriptorPool& descriptor_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue_) +ConditionalRenderingResolvePass::ConditionalRenderingResolvePass( + const Device& device_, Scheduler& scheduler_, DescriptorPool& descriptor_pool_, + ComputePassDescriptorQueue& compute_pass_descriptor_queue_) : ComputePass(device_, descriptor_pool_, INPUT_OUTPUT_DESCRIPTOR_SET_BINDINGS, INPUT_OUTPUT_DESCRIPTOR_UPDATE_TEMPLATE, INPUT_OUTPUT_BANK_INFO, nullptr, RESOLVE_CONDITIONAL_RENDER_COMP_SPV), diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 14fc5ad71..336573574 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -7,8 +7,8 @@ #include "video_core/fence_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" -#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" namespace Core { class System; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index ef891e26b..add0c6fb3 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -11,11 +11,9 @@ #include <utility> #include <vector> -#include <boost/container/small_vector.hpp> -#include <boost/icl/interval_set.hpp> - #include "common/common_types.h" #include "core/memory.h" +#include "video_core/engines/draw_manager.h" #include "video_core/query_cache/query_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_compute_pass.h" @@ -30,6 +28,7 @@ namespace Vulkan { +using Tegra::Engines::Maxwell3D; using VideoCommon::QueryType; namespace { @@ -37,7 +36,7 @@ class SamplesQueryBank : public VideoCommon::BankBase { public: static constexpr size_t BANK_SIZE = 256; static constexpr size_t QUERY_SIZE = 8; - SamplesQueryBank(const Device& device_, size_t index_) + explicit SamplesQueryBank(const Device& device_, size_t index_) : BankBase(BANK_SIZE), device{device_}, index{index_} { const auto& dev = device.GetLogical(); query_pool = dev.CreateQueryPool({ @@ -109,18 +108,19 @@ struct HostSyncValues { static constexpr bool GeneratesBaseBuffer = false; }; -template <typename Traits> class SamplesStreamer : public BaseStreamer { public: - SamplesStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, - Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) - : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, + explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, + Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) + : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_} { BuildResolveBuffer(); current_bank = nullptr; current_query = nullptr; } + ~SamplesStreamer() = default; + void StartCounter() override { if (has_started) { return; @@ -157,7 +157,7 @@ public: PauseCounter(); } - bool HasPendingSync() override { + bool HasPendingSync() const override { return !pending_sync.empty(); } @@ -198,7 +198,7 @@ public: } resolve_slots_remaining = resolve_slots; sync_values_stash.emplace_back(); - sync_values = sync_values = &sync_values_stash.back(); + sync_values = &sync_values_stash.back(); sync_values->reserve(resolve_slots * SamplesQueryBank::BANK_SIZE); } resolve_slots_remaining--; @@ -207,6 +207,7 @@ public: const size_t base_offset = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * (resolve_slots - resolve_slots_remaining - 1); VkQueryPool query_pool = bank->GetInnerPool(); + scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([start, amount, base_offset, query_pool, buffer = *resolve_buffer](vk::CommandBuffer cmdbuf) { size_t final_offset = base_offset + start * SamplesQueryBank::QUERY_SIZE; @@ -284,7 +285,7 @@ public: return index; } - bool HasUnsyncedQueries() override { + bool HasUnsyncedQueries() const override { return !pending_flush_queries.empty(); } @@ -348,8 +349,8 @@ private: for (auto q : queries) { auto* query = GetQuery(q); ApplyBankOp(query, [&indexer](SamplesQueryBank* bank, size_t start, size_t amount) { - auto id = bank->GetIndex(); - auto pair = indexer.try_emplace(id, std::numeric_limits<size_t>::max(), + auto id_ = bank->GetIndex(); + auto pair = indexer.try_emplace(id_, std::numeric_limits<size_t>::max(), std::numeric_limits<size_t>::min()); auto& current_pair = pair.first->second; current_pair.first = std::min(current_pair.first, start); @@ -434,13 +435,14 @@ private: .pNext = nullptr, .flags = 0, .size = SamplesQueryBank::QUERY_SIZE * SamplesQueryBank::BANK_SIZE * resolve_slots, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, .queueFamilyIndexCount = 0, .pQueueFamilyIndices = nullptr, }; resolve_buffers.emplace_back( - std::move(memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal))); + memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal)); } static constexpr size_t resolve_slots = 8; @@ -476,7 +478,8 @@ class TFBQueryBank : public VideoCommon::BankBase { public: static constexpr size_t BANK_SIZE = 1024; static constexpr size_t QUERY_SIZE = 4; - TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, size_t index_) + explicit TFBQueryBank(Scheduler& scheduler_, const MemoryAllocator& memory_allocator, + size_t index_) : BankBase(BANK_SIZE), scheduler{scheduler_}, index{index_} { const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, @@ -525,22 +528,21 @@ private: vk::Buffer buffer; }; -template <typename Traits> class PrimitivesSucceededStreamer; -template <typename Traits> class TFBCounterStreamer : public BaseStreamer { public: - TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_, - Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, - StagingBufferPool& staging_pool_) - : BaseStreamer(id), runtime{runtime_}, device{device_}, scheduler{scheduler_}, + explicit TFBCounterStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, + Scheduler& scheduler_, const MemoryAllocator& memory_allocator_, + StagingBufferPool& staging_pool_) + : BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, memory_allocator{memory_allocator_}, staging_pool{staging_pool_} { buffers_count = 0; current_bank = nullptr; counter_buffers.fill(VK_NULL_HANDLE); offsets.fill(0); last_queries.fill(0); + last_queries_stride.fill(1); const VkBufferCreateInfo buffer_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -564,6 +566,8 @@ public: } } + ~TFBCounterStreamer() = default; + void StartCounter() override { FlushBeginTFB(); has_started = true; @@ -581,15 +585,15 @@ public: if (has_flushed_end_pending) { FlushEndTFB(); } - runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { - if (regs.transform_feedback_enabled == 0) { + runtime.View3DRegs([this](Maxwell3D& maxwell3d) { + if (maxwell3d.regs.transform_feedback_enabled == 0) { streams_mask = 0; has_started = false; } }); } - bool HasPendingSync() override { + bool HasPendingSync() const override { return !pending_sync.empty(); } @@ -650,14 +654,19 @@ public: return index; } - std::optional<VAddr> GetLastQueryStream(size_t stream) { + std::optional<std::pair<VAddr, size_t>> GetLastQueryStream(size_t stream) { if (last_queries[stream] != 0) { - return {last_queries[stream]}; + std::pair<VAddr, size_t> result(last_queries[stream], last_queries_stride[stream]); + return result; } return std::nullopt; } - bool HasUnsyncedQueries() override { + Maxwell3D::Regs::PrimitiveTopology GetOutputTopology() const { + return out_topology; + } + + bool HasUnsyncedQueries() const override { return !pending_flush_queries.empty(); } @@ -762,15 +771,17 @@ private: void UpdateBuffers() { last_queries.fill(0); - runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) { + last_queries_stride.fill(1); + runtime.View3DRegs([this](Maxwell3D& maxwell3d) { buffers_count = 0; - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; - i++) { - const auto& tf = regs.transform_feedback; + out_topology = maxwell3d.draw_manager->GetDrawState().topology; + for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { + const auto& tf = maxwell3d.regs.transform_feedback; if (tf.buffers[i].enable == 0) { continue; } const size_t stream = tf.controls[i].stream; + last_queries_stride[stream] = tf.controls[i].stride; streams_mask |= 1ULL << stream; buffers_count = std::max<size_t>(buffers_count, stream + 1); } @@ -785,7 +796,8 @@ private: }); current_bank = &bank_pool.GetBank(current_bank_id); } - auto [dont_care, slot] = current_bank->Reserve(); + auto [dont_care, other] = current_bank->Reserve(); + const size_t slot = other; // workaround to compile bug. current_bank->AddReference(); static constexpr VkMemoryBarrier READ_BARRIER{ @@ -818,11 +830,9 @@ private: return {current_bank_id, slot}; } - template <typename Traits> friend class PrimitivesSucceededStreamer; static constexpr size_t NUM_STREAMS = 4; - static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL; QueryCacheRuntime& runtime; const Device& device; @@ -851,6 +861,8 @@ private: std::array<VkBuffer, NUM_STREAMS> counter_buffers{}; std::array<VkDeviceSize, NUM_STREAMS> offsets{}; std::array<VAddr, NUM_STREAMS> last_queries; + std::array<size_t, NUM_STREAMS> last_queries_stride; + Maxwell3D::Regs::PrimitiveTopology out_topology; u64 streams_mask; }; @@ -858,32 +870,34 @@ class PrimitivesQueryBase : public VideoCommon::QueryBase { public: // Default constructor PrimitivesQueryBase() - : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, - dependant_index{}, dependant_manage{} {} + : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0) {} // Parameterized constructor - PrimitivesQueryBase(bool is_long, VAddr address) - : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{}, - dependant_index{}, dependant_manage{} { - if (is_long) { + PrimitivesQueryBase(bool has_timestamp, VAddr address) + : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0) { + if (has_timestamp) { flags |= VideoCommon::QueryFlagBits::HasTimestamp; } } - u64 stride; - VAddr dependant_address; - size_t dependant_index; - bool dependant_manage; + u64 stride{}; + VAddr dependant_address{}; + Maxwell3D::Regs::PrimitiveTopology topology{Maxwell3D::Regs::PrimitiveTopology::Points}; + size_t dependant_index{}; + bool dependant_manage{}; }; -template <typename Traits> class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> { public: - PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_, - TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_) - : VideoCommon::SimpleStreamer<PrimitivesQueryBase>( - id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)), - runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {} + explicit PrimitivesSucceededStreamer(size_t id_, QueryCacheRuntime& runtime_, + TFBCounterStreamer& tfb_streamer_, + Core::Memory::Memory& cpu_memory_) + : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(id_), runtime{runtime_}, + tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} { + MakeDependent(&tfb_streamer); + } + + ~PrimitivesSucceededStreamer() = default; size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, std::optional<u32> subreport_) override { @@ -901,8 +915,11 @@ public: const size_t subreport = static_cast<size_t>(*subreport_); auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport); bool must_manage_dependance = false; + new_query->topology = tfb_streamer.GetOutputTopology(); if (dependant_address_opt) { - new_query->dependant_address = *dependant_address_opt; + auto [dep_address, stride] = *dependant_address_opt; + new_query->dependant_address = dep_address; + new_query->stride = stride; } else { new_query->dependant_index = tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_); @@ -917,25 +934,28 @@ public: } return index; } + new_query->stride = 1; + runtime.View3DRegs([new_query, subreport](Maxwell3D& maxwell3d) { + for (size_t i = 0; i < Maxwell3D::Regs::NumTransformFeedbackBuffers; i++) { + const auto& tf = maxwell3d.regs.transform_feedback; + if (tf.buffers[i].enable == 0) { + continue; + } + if (tf.controls[i].stream != subreport) { + continue; + } + new_query->stride = tf.controls[i].stride; + break; + } + }); } new_query->dependant_manage = must_manage_dependance; - runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) { - for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers; - i++) { - const auto& tf = regs.transform_feedback; - if (tf.controls[i].stream != subreport) { - continue; - } - new_query->stride = tf.controls[i].stride; - break; - } - }); pending_flush_queries.push_back(index); return index; } - bool HasUnsyncedQueries() override { + bool HasUnsyncedQueries() const override { return !pending_flush_queries.empty(); } @@ -960,22 +980,49 @@ public: } query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced; + u64 num_vertices = 0; if (query->dependant_manage) { auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index); - query->value = dependant_query->value / query->stride; + num_vertices = dependant_query->value / query->stride; tfb_streamer.Free(query->dependant_index); } else { u8* pointer = cpu_memory.GetPointer(query->dependant_address); u32 result; std::memcpy(&result, pointer, sizeof(u32)); - query->value = static_cast<u64>(result) / query->stride; + num_vertices = static_cast<u64>(result) / query->stride; } + query->value = [&]() -> u64 { + switch (query->topology) { + case Maxwell3D::Regs::PrimitiveTopology::Points: + return num_vertices; + case Maxwell3D::Regs::PrimitiveTopology::Lines: + return num_vertices / 2; + case Maxwell3D::Regs::PrimitiveTopology::LineLoop: + return (num_vertices / 2) + 1; + case Maxwell3D::Regs::PrimitiveTopology::LineStrip: + return num_vertices - 1; + case Maxwell3D::Regs::PrimitiveTopology::Patches: + case Maxwell3D::Regs::PrimitiveTopology::Triangles: + case Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency: + return num_vertices / 3; + case Maxwell3D::Regs::PrimitiveTopology::TriangleFan: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStrip: + case Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency: + return num_vertices - 2; + case Maxwell3D::Regs::PrimitiveTopology::Quads: + return num_vertices / 4; + case Maxwell3D::Regs::PrimitiveTopology::Polygon: + return 1U; + default: + return num_vertices; + } + }(); } } private: QueryCacheRuntime& runtime; - TFBCounterStreamer<QueryCacheParams>& tfb_streamer; + TFBCounterStreamer& tfb_streamer; Core::Memory::Memory& cpu_memory; // syncing queue @@ -1005,7 +1052,10 @@ struct QueryCacheRuntimeImpl { tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, scheduler, memory_allocator, staging_pool), primitives_succeeded_streamer( - static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_), + static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, + cpu_memory_), + primitives_needed_minus_suceeded_streamer( + static_cast<size_t>(QueryType::StreamingPrimitivesNeededMinusSucceeded), runtime, 0u), hcr_setup{}, hcr_is_set{}, is_hcr_running{} { hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; @@ -1040,9 +1090,10 @@ struct QueryCacheRuntimeImpl { // Streamers VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer; - SamplesStreamer<QueryCacheParams> sample_streamer; - TFBCounterStreamer<QueryCacheParams> tfb_streamer; - PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer; + SamplesStreamer sample_streamer; + TFBCounterStreamer tfb_streamer; + PrimitivesSucceededStreamer primitives_succeeded_streamer; + VideoCommon::StubStreamer<QueryCacheParams> primitives_needed_minus_suceeded_streamer; std::vector<std::pair<VAddr, VAddr>> little_cache; std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to; @@ -1059,7 +1110,7 @@ struct QueryCacheRuntimeImpl { bool is_hcr_running; // maxwell3d - Tegra::Engines::Maxwell3D* maxwell3d; + Maxwell3D* maxwell3d; }; QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, @@ -1074,13 +1125,13 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer, staging_pool_, compute_pass_descriptor_queue, descriptor_pool); } -void QueryCacheRuntime::Bind3DEngine(Tegra::Engines::Maxwell3D* maxwell3d) { +void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) { impl->maxwell3d = maxwell3d; } template <typename Func> void QueryCacheRuntime::View3DRegs(Func&& func) { - func(impl->maxwell3d->regs); + func(*impl->maxwell3d); } void QueryCacheRuntime::EndHostConditionalRendering() { @@ -1240,8 +1291,12 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp return &impl->sample_streamer; case QueryType::StreamingByteCount: return &impl->tfb_streamer; + case QueryType::StreamingPrimitivesNeeded: + case QueryType::VtgPrimitivesOut: case QueryType::StreamingPrimitivesSucceeded: return &impl->primitives_succeeded_streamer; + case QueryType::StreamingPrimitivesNeededMinusSucceeded: + return &impl->primitives_needed_minus_suceeded_streamer; default: return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 9ad2929d7..e9a1ea169 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -49,7 +49,8 @@ public: bool HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool qc_dirty); bool HostConditionalRenderingCompareValues(VideoCommon::LookupData object_1, - VideoCommon::LookupData object_2, bool qc_dirty, bool equal_check); + VideoCommon::LookupData object_2, bool qc_dirty, + bool equal_check); VideoCommon::StreamerInterface* GetStreamerInterface(VideoCommon::QueryType query_type); @@ -66,7 +67,7 @@ private: }; struct QueryCacheParams { - using RuntimeType = Vulkan::QueryCacheRuntime; + using RuntimeType = typename Vulkan::QueryCacheRuntime; }; using QueryCache = VideoCommon::QueryCacheBase<QueryCacheParams>; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e8862ba04..c7ce7c312 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -194,15 +194,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { query_cache.NotifySegment(true); -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - // query_cache.UpdateCounters(); - } -#else - // query_cache.UpdateCounters(); -#endif - GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; if (!pipeline) { return; @@ -294,15 +285,6 @@ void RasterizerVulkan::DrawTexture() { query_cache.NotifySegment(true); -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - // query_cache.UpdateCounters(); - } -#else - // query_cache.UpdateCounters(); -#endif - texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -332,15 +314,6 @@ void RasterizerVulkan::Clear(u32 layer_count) { FlushWork(); gpu_memory->FlushCaching(); -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - // query_cache.UpdateCounters(); - } -#else - // query_cache.UpdateCounters(); -#endif - query_cache.NotifySegment(true); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ffd44c68d..ad069556c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -85,7 +85,8 @@ public: void Clear(u32 layer_count) override; void DispatchCompute() override; void ResetCounter(VideoCommon::QueryType type) override; - void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; + void Query(GPUVAddr gpu_addr, VideoCommon::QueryType type, + VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) override; void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override; void DisableGraphicsUniformBuffer(size_t stage, u32 index) override; void FlushAll() override; diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index c87e5fb07..da03803aa 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -15,9 +15,13 @@ #include "common/common_types.h" #include "common/polyfill_thread.h" #include "video_core/renderer_vulkan/vk_master_semaphore.h" -#include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" +namespace VideoCommon { +template <typename Trait> +class QueryCacheBase; +} + namespace Vulkan { class CommandPool; @@ -26,6 +30,8 @@ class Framebuffer; class GraphicsPipeline; class StateTracker; +struct QueryCacheParams; + /// The scheduler abstracts command buffer and fence management with an interface that's able to do /// OpenGL-like operations on Vulkan command buffers. class Scheduler { @@ -63,7 +69,7 @@ public: void InvalidateState(); /// Assigns the query cache. - void SetQueryCache(QueryCache& query_cache_) { + void SetQueryCache(VideoCommon::QueryCacheBase<QueryCacheParams>& query_cache_) { query_cache = &query_cache_; } @@ -219,7 +225,7 @@ private: std::unique_ptr<MasterSemaphore> master_semaphore; std::unique_ptr<CommandPool> command_pool; - QueryCache* query_cache = nullptr; + VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr; vk::CommandBuffer current_cmdbuf; |