summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2020-02-17 23:10:23 +0100
committerFernando Sahmkow <fsahmkow27@gmail.com>2020-04-22 17:36:10 +0200
commit487379c593bcaf3787ede187c5d44f7923b54dc9 (patch)
treeb66c5c541a55be6d4b76b78c07be11731a7cb400
parentTextureCache: Flush linear textures after finishing rendering. (diff)
downloadyuzu-487379c593bcaf3787ede187c5d44f7923b54dc9.tar
yuzu-487379c593bcaf3787ede187c5d44f7923b54dc9.tar.gz
yuzu-487379c593bcaf3787ede187c5d44f7923b54dc9.tar.bz2
yuzu-487379c593bcaf3787ede187c5d44f7923b54dc9.tar.lz
yuzu-487379c593bcaf3787ede187c5d44f7923b54dc9.tar.xz
yuzu-487379c593bcaf3787ede187c5d44f7923b54dc9.tar.zst
yuzu-487379c593bcaf3787ede187c5d44f7923b54dc9.zip
-rw-r--r--src/video_core/engines/maxwell_3d.cpp16
-rw-r--r--src/video_core/engines/maxwell_3d.h4
-rw-r--r--src/video_core/gpu.cpp2
-rw-r--r--src/video_core/gpu.h2
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_thread.cpp6
-rw-r--r--src/video_core/gpu_thread.h7
-rw-r--r--src/video_core/rasterizer_interface.h8
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp28
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h2
-rw-r--r--src/video_core/texture_cache/texture_cache.h32
12 files changed, 94 insertions, 19 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2605c3b42..c297bc31b 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
}
}
-void Maxwell3D::ReleaseFences() {
- for (const auto pair : delay_fences) {
- const auto [addr, payload] = pair;
- memory_manager.Write<u32>(addr, static_cast<u32>(payload));
- }
- delay_fences.clear();
-}
-
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release: {
- rasterizer.FlushCommands();
- rasterizer.SyncGuestHost();
const u64 result = regs.query.query_sequence;
- delay_fences.emplace_back(regs.query.QueryAddress(), result);
+ if (regs.query.query_get.fence == 1) {
+ rasterizer.SignalFence(regs.query.QueryAddress(), static_cast<u32>(result));
+ } else {
+ StampQueryResult(result, regs.query.query_get.short_query == 0);
+ }
break;
}
case Regs::QueryOperation::Acquire:
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 0a93827ec..59d5752d2 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1427,8 +1427,6 @@ public:
Tables tables{};
} dirty;
- void ReleaseFences();
-
private:
void InitializeRegisterDefaults();
@@ -1469,8 +1467,6 @@ private:
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
- std::vector<std::pair<GPUVAddr, u64>> delay_fences;
-
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 71ddfbd26..d05b6a9d2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -147,7 +147,7 @@ void GPU::SyncGuestHost() {
}
void GPU::OnCommandListEnd() {
- maxwell_3d->ReleaseFences();
+ renderer.Rasterizer().ReleaseFences();
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index b88445634..fa9991c87 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -157,7 +157,7 @@ public:
void FlushCommands();
void SyncGuestHost();
- void OnCommandListEnd();
+ virtual void OnCommandListEnd();
/// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D();
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 20e73a37e..53305ab43 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
gpu_thread.WaitIdle();
}
+void GPUAsynch::OnCommandListEnd() {
+ gpu_thread.OnCommandListEnd();
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 03fd0eef0..517658612 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -32,6 +32,8 @@ public:
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void WaitIdle() const override;
+ void OnCommandListEnd() override;
+
protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 1994d3bb4..251a9d911 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
+ } else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
+ renderer.Rasterizer().ReleaseFences();
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const {
}
}
+void ThreadManager::OnCommandListEnd() {
+ PushCommand(OnCommandListEndCommand());
+}
+
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index cd74ad330..9d0877921 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final {
u64 size;
};
+/// Command to signal to the GPU thread that processing has ended
+struct OnCommandListEndCommand final {};
+
using CommandData =
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
- InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+ InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
struct CommandDataContainer {
CommandDataContainer() = default;
@@ -122,6 +125,8 @@ public:
// Wait until the gpu thread is idle.
void WaitIdle() const;
+ void OnCommandListEnd();
+
private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 0d05a3fc7..72f65b166 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -49,6 +49,14 @@ public:
/// Records a GPU query and caches it
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
+ virtual void SignalFence(GPUVAddr addr, u32 value) {
+
+ }
+
+ virtual void ReleaseFences() {
+
+ }
+
/// Notify rasterizer that all caches should be flushed to Switch memory
virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 988eaeaa5..93bb33e8c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() {
buffer_cache.SyncGuestHost();
}
+void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) {
+ if (!fences.empty()) {
+ const std::pair<GPUVAddr, u32>& current_fence = fences.front();
+ const auto [address, payload] = current_fence;
+ texture_cache.PopAsyncFlushes();
+ auto& gpu{system.GPU()};
+ auto& memory_manager{gpu.MemoryManager()};
+ memory_manager.Write<u32>(address, payload);
+ fences.pop_front();
+ }
+ fences.emplace_back(addr, value);
+ texture_cache.CommitAsyncFlushes();
+ FlushCommands();
+ SyncGuestHost();
+}
+
+void RasterizerOpenGL::ReleaseFences() {
+ while (!fences.empty()) {
+ const std::pair<GPUVAddr, u32>& current_fence = fences.front();
+ const auto [address, payload] = current_fence;
+ texture_cache.PopAsyncFlushes();
+ auto& gpu{system.GPU()};
+ auto& memory_manager{gpu.MemoryManager()};
+ memory_manager.Write<u32>(address, payload);
+ fences.pop_front();
+ }
+}
+
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a870024c6..486a154ad 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -69,6 +69,8 @@ public:
void InvalidateRegion(VAddr addr, u64 size) override;
void OnCPUWrite(VAddr addr, u64 size) override;
void SyncGuestHost() override;
+ void SignalFence(GPUVAddr addr, u32 value) override;
+ void ReleaseFences() override;
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
void FlushCommands() override;
void TickFrame() override;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index d8c8390bb..6629c59ed 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -238,7 +238,7 @@ public:
surface->MarkAsRenderTarget(false, NO_RT);
const auto& cr_params = surface->GetSurfaceParams();
if (!cr_params.is_tiled) {
- FlushSurface(surface);
+ AsyncFlushSurface(surface);
}
}
render_targets[index].target = surface_view.first;
@@ -317,6 +317,26 @@ public:
return ++ticks;
}
+ void CommitAsyncFlushes() {
+ commited_flushes.push_back(uncommited_flushes);
+ uncommited_flushes.reset();
+ }
+
+ void PopAsyncFlushes() {
+ if (commited_flushes.empty()) {
+ return;
+ }
+ auto& flush_list = commited_flushes.front();
+ if (!flush_list) {
+ commited_flushes.pop_front();
+ return;
+ }
+ for (TSurface& surface : *flush_list) {
+ FlushSurface(surface);
+ }
+ commited_flushes.pop_front();
+ }
+
protected:
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
bool is_astc_supported)
@@ -1152,6 +1172,13 @@ private:
TView view;
};
+ void AsyncFlushSurface(TSurface& surface) {
+ if (!uncommited_flushes) {
+ uncommited_flushes = std::make_shared<std::list<TSurface>>();
+ }
+ uncommited_flushes->push_back(surface);
+ }
+
VideoCore::RasterizerInterface& rasterizer;
FormatLookupTable format_lookup_table;
@@ -1198,6 +1225,9 @@ private:
std::list<TSurface> marked_for_unregister;
+ std::shared_ptr<std::list<TSurface>> uncommited_flushes{};
+ std::list<std::shared_ptr<std::list<TSurface>>> commited_flushes;
+
StagingCache staging_cache;
std::recursive_mutex mutex;
};