summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-08-04 13:38:49 +0200
committerFernando Sahmkow <fsahmkow27@gmail.com>2023-09-23 23:05:29 +0200
commitaa6587d854e4953876b02ca71278a665bcae8179 (patch)
treef2dafb0cda400fe1321f670c9eacc26051eca9aa /src/video_core
parentMacro HLE: Add DrawIndirectByteCount (diff)
downloadyuzu-aa6587d854e4953876b02ca71278a665bcae8179.tar
yuzu-aa6587d854e4953876b02ca71278a665bcae8179.tar.gz
yuzu-aa6587d854e4953876b02ca71278a665bcae8179.tar.bz2
yuzu-aa6587d854e4953876b02ca71278a665bcae8179.tar.lz
yuzu-aa6587d854e4953876b02ca71278a665bcae8179.tar.xz
yuzu-aa6587d854e4953876b02ca71278a665bcae8179.tar.zst
yuzu-aa6587d854e4953876b02ca71278a665bcae8179.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/query_cache/query_base.h1
-rw-r--r--src/video_core/query_cache/query_cache.h18
-rw-r--r--src/video_core/query_cache/query_stream.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp160
4 files changed, 180 insertions, 5 deletions
diff --git a/src/video_core/query_cache/query_base.h b/src/video_core/query_cache/query_base.h
index 485ed669c..0ae23af9f 100644
--- a/src/video_core/query_cache/query_base.h
+++ b/src/video_core/query_cache/query_base.h
@@ -18,6 +18,7 @@ enum class QueryFlagBits : u32 {
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
IsFence = 1 << 8, ///< Indicates the query is a fence.
+ IsQueuedForAsyncFlush = 1 <<9,///< Indicates that the query can be flushed at any moment
};
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)
diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h
index f6af48d14..f1393d5c7 100644
--- a/src/video_core/query_cache/query_cache.h
+++ b/src/video_core/query_cache/query_cache.h
@@ -489,8 +489,22 @@ void QueryCacheBase<Traits>::PopAsyncFlushes() {
if (mask == 0) {
return;
}
- impl->ForEachStreamerIn(mask,
- [](StreamerInterface* streamer) { streamer->PopUnsyncedQueries(); });
+ u64 ran_mask = 0;
+ u64 next_phase = 0;
+ while (mask) {
+ impl->ForEachStreamerIn(mask, [&mask, &ran_mask, &next_phase](StreamerInterface* streamer) {
+ u64 dep_mask = streamer->GetDependenceMask();
+ if ((dep_mask & ~ran_mask) != 0) {
+ next_phase |= dep_mask;
+ return;
+ }
+ u64 index = streamer->GetId();
+ ran_mask |= (1ULL << index);
+ mask &= ~(1ULL << index);
+ streamer->PopUnsyncedQueries();
+ });
+ ran_mask |= next_phase;
+ }
}
// Invalidation
diff --git a/src/video_core/query_cache/query_stream.h b/src/video_core/query_cache/query_stream.h
index dd5f95b3c..0e9275565 100644
--- a/src/video_core/query_cache/query_stream.h
+++ b/src/video_core/query_cache/query_stream.h
@@ -70,6 +70,10 @@ public:
return id;
}
+ u64 GetDependenceMask() const {
+ return dependance_mask;
+ }
+
protected:
const size_t id;
const u64 dependance_mask;
@@ -78,7 +82,7 @@ protected:
template <typename QueryType>
class SimpleStreamer : public StreamerInterface {
public:
- SimpleStreamer(size_t id_) : StreamerInterface{id_} {}
+ SimpleStreamer(size_t id_, u64 dependance_mask_ = 0) : StreamerInterface{id_, dependance_mask_} {}
virtual ~SimpleStreamer() = default;
protected:
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 42f571007..ef891e26b 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -526,6 +526,9 @@ private:
};
template <typename Traits>
+class PrimitivesSucceededStreamer;
+
+template <typename Traits>
class TFBCounterStreamer : public BaseStreamer {
public:
TFBCounterStreamer(size_t id, QueryCacheRuntime& runtime_, const Device& device_,
@@ -537,6 +540,7 @@ public:
current_bank = nullptr;
counter_buffers.fill(VK_NULL_HANDLE);
offsets.fill(0);
+ last_queries.fill(0);
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -630,7 +634,7 @@ public:
return index;
}
const size_t subreport = static_cast<size_t>(*subreport_);
- UpdateBuffers();
+ last_queries[subreport] = address;
if ((streams_mask & (1ULL << subreport)) == 0) {
new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
return index;
@@ -646,6 +650,13 @@ public:
return index;
}
+ std::optional<VAddr> GetLastQueryStream(size_t stream) {
+ if (last_queries[stream] != 0) {
+ return {last_queries[stream]};
+ }
+ return std::nullopt;
+ }
+
bool HasUnsyncedQueries() override {
return !pending_flush_queries.empty();
}
@@ -657,6 +668,7 @@ public:
size_t offset_base = staging_ref.offset;
for (auto q : pending_flush_queries) {
auto* query = GetQuery(q);
+ query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush;
auto& bank = bank_pool.GetBank(query->start_bank_id);
bank.Sync(staging_ref, offset_base, query->start_slot, 1);
offset_base += TFBQueryBank::QUERY_SIZE;
@@ -741,13 +753,15 @@ private:
cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr);
});
} else {
- scheduler.Record([this, total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
+ scheduler.Record([this,
+ total = static_cast<u32>(buffers_count)](vk::CommandBuffer cmdbuf) {
cmdbuf.EndTransformFeedbackEXT(0, total, counter_buffers.data(), offsets.data());
});
}
}
void UpdateBuffers() {
+ last_queries.fill(0);
runtime.View3DRegs([this](Tegra::Engines::Maxwell3D::Regs& regs) {
buffers_count = 0;
for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
@@ -804,6 +818,9 @@ private:
return {current_bank_id, slot};
}
+ template <typename Traits>
+ friend class PrimitivesSucceededStreamer;
+
static constexpr size_t NUM_STREAMS = 4;
static constexpr size_t STREAMS_MASK = (1ULL << NUM_STREAMS) - 1ULL;
@@ -833,9 +850,143 @@ private:
size_t buffers_count{};
std::array<VkBuffer, NUM_STREAMS> counter_buffers{};
std::array<VkDeviceSize, NUM_STREAMS> offsets{};
+ std::array<VAddr, NUM_STREAMS> last_queries;
u64 streams_mask;
};
+class PrimitivesQueryBase : public VideoCommon::QueryBase {
+public:
+ // Default constructor
+ PrimitivesQueryBase()
+ : VideoCommon::QueryBase(0, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
+ dependant_index{}, dependant_manage{} {}
+
+ // Parameterized constructor
+ PrimitivesQueryBase(bool is_long, VAddr address)
+ : VideoCommon::QueryBase(address, VideoCommon::QueryFlagBits::IsHostManaged, 0), stride{},
+ dependant_index{}, dependant_manage{} {
+ if (is_long) {
+ flags |= VideoCommon::QueryFlagBits::HasTimestamp;
+ }
+ }
+
+ u64 stride;
+ VAddr dependant_address;
+ size_t dependant_index;
+ bool dependant_manage;
+};
+
+template <typename Traits>
+class PrimitivesSucceededStreamer : public VideoCommon::SimpleStreamer<PrimitivesQueryBase> {
+public:
+ PrimitivesSucceededStreamer(size_t id, QueryCacheRuntime& runtime_,
+ TFBCounterStreamer<QueryCacheParams>& tfb_streamer_, Core::Memory::Memory& cpu_memory_)
+ : VideoCommon::SimpleStreamer<PrimitivesQueryBase>(
+ id, 1ULL << static_cast<u64>(VideoCommon::QueryType::StreamingByteCount)),
+ runtime{runtime_}, tfb_streamer{tfb_streamer_}, cpu_memory{cpu_memory_} {}
+
+ size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
+ std::optional<u32> subreport_) override {
+ auto index = BuildQuery();
+ auto* new_query = GetQuery(index);
+ new_query->guest_address = address;
+ new_query->value = 0;
+ if (has_timestamp) {
+ new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
+ }
+ if (!subreport_) {
+ new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
+ return index;
+ }
+ const size_t subreport = static_cast<size_t>(*subreport_);
+ auto dependant_address_opt = tfb_streamer.GetLastQueryStream(subreport);
+ bool must_manage_dependance = false;
+ if (dependant_address_opt) {
+ new_query->dependant_address = *dependant_address_opt;
+ } else {
+ new_query->dependant_index =
+ tfb_streamer.WriteCounter(address, has_timestamp, value, subreport_);
+ auto* dependant_query = tfb_streamer.GetQuery(new_query->dependant_index);
+ dependant_query->flags |= VideoCommon::QueryFlagBits::IsInvalidated;
+ must_manage_dependance = true;
+ if (True(dependant_query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) {
+ new_query->value = 0;
+ new_query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
+ if (must_manage_dependance) {
+ tfb_streamer.Free(new_query->dependant_index);
+ }
+ return index;
+ }
+ }
+
+ new_query->dependant_manage = must_manage_dependance;
+ runtime.View3DRegs([new_query, subreport](Tegra::Engines::Maxwell3D::Regs& regs) {
+ for (size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers;
+ i++) {
+ const auto& tf = regs.transform_feedback;
+ if (tf.controls[i].stream != subreport) {
+ continue;
+ }
+ new_query->stride = tf.controls[i].stride;
+ break;
+ }
+ });
+ pending_flush_queries.push_back(index);
+ return index;
+ }
+
+ bool HasUnsyncedQueries() override {
+ return !pending_flush_queries.empty();
+ }
+
+ void PushUnsyncedQueries() override {
+ std::scoped_lock lk(flush_guard);
+ pending_flush_sets.emplace_back(std::move(pending_flush_queries));
+ pending_flush_queries.clear();
+ }
+
+ void PopUnsyncedQueries() override {
+ std::vector<size_t> flushed_queries;
+ {
+ std::scoped_lock lk(flush_guard);
+ flushed_queries = std::move(pending_flush_sets.front());
+ pending_flush_sets.pop_front();
+ }
+
+ for (auto q : flushed_queries) {
+ auto* query = GetQuery(q);
+ if (True(query->flags & VideoCommon::QueryFlagBits::IsFinalValueSynced)) {
+ continue;
+ }
+
+ query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
+ if (query->dependant_manage) {
+ auto* dependant_query = tfb_streamer.GetQuery(query->dependant_index);
+ query->value = dependant_query->value / query->stride;
+ tfb_streamer.Free(query->dependant_index);
+ } else {
+ u8* pointer = cpu_memory.GetPointer(query->dependant_address);
+ u32 result;
+ std::memcpy(&result, pointer, sizeof(u32));
+ query->value = static_cast<u64>(result) / query->stride;
+ }
+ }
+ }
+
+private:
+ QueryCacheRuntime& runtime;
+ TFBCounterStreamer<QueryCacheParams>& tfb_streamer;
+ Core::Memory::Memory& cpu_memory;
+
+ // syncing queue
+ std::vector<size_t> pending_sync;
+
+ // flush levels
+ std::vector<size_t> pending_flush_queries;
+ std::deque<std::vector<size_t>> pending_flush_sets;
+ std::mutex flush_guard;
+};
+
} // namespace
struct QueryCacheRuntimeImpl {
@@ -853,6 +1004,8 @@ struct QueryCacheRuntimeImpl {
scheduler, memory_allocator),
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
scheduler, memory_allocator, staging_pool),
+ primitives_succeeded_streamer(
+ static_cast<size_t>(QueryType::StreamingPrimitivesSucceeded), runtime, tfb_streamer, cpu_memory_),
hcr_setup{}, hcr_is_set{}, is_hcr_running{} {
hcr_setup.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT;
@@ -889,6 +1042,7 @@ struct QueryCacheRuntimeImpl {
VideoCommon::GuestStreamer<QueryCacheParams> guest_streamer;
SamplesStreamer<QueryCacheParams> sample_streamer;
TFBCounterStreamer<QueryCacheParams> tfb_streamer;
+ PrimitivesSucceededStreamer<QueryCacheParams> primitives_succeeded_streamer;
std::vector<std::pair<VAddr, VAddr>> little_cache;
std::vector<std::pair<VkBuffer, VkDeviceSize>> buffers_to_upload_to;
@@ -1086,6 +1240,8 @@ VideoCommon::StreamerInterface* QueryCacheRuntime::GetStreamerInterface(QueryTyp
return &impl->sample_streamer;
case QueryType::StreamingByteCount:
return &impl->tfb_streamer;
+ case QueryType::StreamingPrimitivesSucceeded:
+ return &impl->primitives_succeeded_streamer;
default:
return nullptr;
}