From 4483089d704cd4913a748d2198359cc0cf7b32c5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 7 Jan 2019 23:32:02 -0500 Subject: gpu: Refactor to take RendererBase instead of RasterizerInterface. --- src/video_core/gpu.cpp | 5 +++-- src/video_core/gpu.h | 34 +++++++++++++++++++--------------- 2 files changed, 22 insertions(+), 17 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ac30d1a89..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -12,7 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" -#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" namespace Tegra { @@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { UNREACHABLE(); } -GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { +GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { + auto& rasterizer{renderer.Rasterizer()}; memory_manager = std::make_unique(); dma_pusher = std::make_unique(*this); maxwell_3d = std::make_unique(system, rasterizer, *memory_manager); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 6313702f2..ac7aec6a4 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -16,8 +16,8 @@ class System; } namespace VideoCore { -class RasterizerInterface; -} +class RendererBase; +} // namespace VideoCore namespace Tegra { @@ -121,7 +121,8 @@ enum class EngineID { class GPU final { public: - explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); + ~GPU(); struct MethodCall { @@ -200,9 +201,24 @@ public: }; } regs{}; +private: + void ProcessBindMethod(const MethodCall& method_call); + void ProcessSemaphoreTriggerMethod(); + void ProcessSemaphoreRelease(); + void ProcessSemaphoreAcquire(); + + // Calls a GPU puller method. + void CallPullerMethod(const MethodCall& method_call); + // Calls a GPU engine method. + void CallEngineMethod(const MethodCall& method_call); + // Determines where the method should be executed. + bool ExecuteMethodOnEngine(const MethodCall& method_call); + private: std::unique_ptr dma_pusher; std::unique_ptr memory_manager; + + VideoCore::RendererBase& renderer; /// Mapping of command subchannels to their bound engine ids. std::array bound_engines = {}; @@ -217,18 +233,6 @@ private: std::unique_ptr maxwell_dma; /// Inline memory engine std::unique_ptr kepler_memory; - - void ProcessBindMethod(const MethodCall& method_call); - void ProcessSemaphoreTriggerMethod(); - void ProcessSemaphoreRelease(); - void ProcessSemaphoreAcquire(); - - // Calls a GPU puller method. - void CallPullerMethod(const MethodCall& method_call); - // Calls a GPU engine method. - void CallEngineMethod(const MethodCall& method_call); - // Determines where the method should be executed. - bool ExecuteMethodOnEngine(const MethodCall& method_call); }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3 From ac51d048a91593a3da124aeea32dc5b0898f1dd6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 21 Jan 2019 15:18:09 -0500 Subject: gpu: Refactor command and swap buffers interface for asynch. --- src/video_core/gpu.cpp | 10 ++++++++++ src/video_core/gpu.h | 15 ++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08abf8ac9..b0f3310e5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -65,6 +65,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::PushGPUEntries(Tegra::CommandList&& entries) { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); +} + +void GPU::SwapBuffers( + std::optional> framebuffer) { + renderer.SwapBuffers(std::move(framebuffer)); +} + u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ac7aec6a4..62649bd6e 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -200,6 +200,13 @@ public: std::array reg_array; }; } regs{}; + + /// Push GPU command entries to be processed + void PushGPUEntries(Tegra::CommandList&& entries); + + /// Swap buffers (render frame) + void SwapBuffers( + std::optional> framebuffer); private: void ProcessBindMethod(const MethodCall& method_call); @@ -207,11 +214,13 @@ private: void ProcessSemaphoreRelease(); void ProcessSemaphoreAcquire(); - // Calls a GPU puller method. + /// Calls a GPU puller method. void CallPullerMethod(const MethodCall& method_call); - // Calls a GPU engine method. + + /// Calls a GPU engine method. void CallEngineMethod(const MethodCall& method_call); - // Determines where the method should be executed. + + /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); private: -- cgit v1.2.3 From 7b574f406b25c02a0e0efd8b7ec13d68ecb55497 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 23 Jan 2019 22:17:55 -0500 Subject: gpu: Move command processing to another thread. --- src/video_core/CMakeLists.txt | 2 + src/video_core/engines/kepler_memory.cpp | 2 +- src/video_core/engines/maxwell_dma.cpp | 4 +- src/video_core/gpu.cpp | 44 ++++++++- src/video_core/gpu.h | 22 ++++- src/video_core/gpu_thread.cpp | 154 +++++++++++++++++++++++++++++++ src/video_core/gpu_thread.h | 135 +++++++++++++++++++++++++++ 7 files changed, 353 insertions(+), 10 deletions(-) create mode 100644 src/video_core/gpu_thread.cpp create mode 100644 src/video_core/gpu_thread.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e9d2b3be..3bb5d0ed7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -17,6 +17,8 @@ add_library(video_core STATIC engines/shader_header.h gpu.cpp gpu.h + gpu_thread.cpp + gpu_thread.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { // We have to invalidate the destination region to evict any outdated surfaces from the cache. // We do this before actually writing the new data because the destination address might contain // a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); Memory::Write32(*dest_address, data); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0474c7ba3..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() { const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated // copying. - rasterizer.FlushRegion(*source_cpu, src_size); + Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); // We have to invalidate the destination region to evict any outdated surfaces from the // cache. We do this before actually writing the new data because the destination address // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_cpu, dst_size); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); }; if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b0f3310e5..0d7a052dd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,12 +6,14 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" +#include "video_core/gpu_thread.h" #include "video_core/renderer_base.h" namespace Tegra { @@ -37,6 +39,10 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren kepler_compute = std::make_unique(*memory_manager); maxwell_dma = std::make_unique(system, rasterizer, *memory_manager); kepler_memory = std::make_unique(system, rasterizer, *memory_manager); + + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread = std::make_unique(renderer, *dma_pusher); + } } GPU::~GPU() = default; @@ -66,13 +72,45 @@ const DmaPusher& GPU::DmaPusher() const { } void GPU::PushGPUEntries(Tegra::CommandList&& entries) { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->SubmitList(std::move(entries)); + } else { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); + } } void GPU::SwapBuffers( std::optional> framebuffer) { - renderer.SwapBuffers(std::move(framebuffer)); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->SwapBuffers(std::move(framebuffer)); + } else { + renderer.SwapBuffers(std::move(framebuffer)); + } +} + +void GPU::FlushRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->FlushRegion(addr, size); + } else { + renderer.Rasterizer().FlushRegion(addr, size); + } +} + +void GPU::InvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->InvalidateRegion(addr, size); + } else { + renderer.Rasterizer().InvalidateRegion(addr, size); + } +} + +void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->FlushAndInvalidateRegion(addr, size); + } else { + renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); + } } u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 62649bd6e..3f3098bf1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,6 +19,10 @@ namespace VideoCore { class RendererBase; } // namespace VideoCore +namespace VideoCommon::GPUThread { +class ThreadManager; +} // namespace VideoCommon::GPUThread + namespace Tegra { enum class RenderTargetFormat : u32 { @@ -200,7 +204,7 @@ public: std::array reg_array; }; } regs{}; - + /// Push GPU command entries to be processed void PushGPUEntries(Tegra::CommandList&& entries); @@ -208,6 +212,15 @@ public: void SwapBuffers( std::optional> framebuffer); + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(VAddr addr, u64 size); + private: void ProcessBindMethod(const MethodCall& method_call); void ProcessSemaphoreTriggerMethod(); @@ -216,17 +229,18 @@ private: /// Calls a GPU puller method. void CallPullerMethod(const MethodCall& method_call); - + /// Calls a GPU engine method. void CallEngineMethod(const MethodCall& method_call); - + /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); private: std::unique_ptr dma_pusher; std::unique_ptr memory_manager; - + std::unique_ptr gpu_thread; + VideoCore::RendererBase& renderer; /// Mapping of command subchannels to their bound engine ids. diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..22c4cca4d --- /dev/null +++ b/src/video_core/gpu_thread.cpp @@ -0,0 +1,154 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/microprofile.h" +#include "core/frontend/scope_acquire_window_context.h" +#include "core/settings.h" +#include "video_core/dma_pusher.h" +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon::GPUThread { + +/// Executes a single GPU thread command +static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher) { + if (const auto submit_list = std::get_if(command)) { + dma_pusher.Push(std::move(submit_list->entries)); + dma_pusher.DispatchCalls(); + } else if (const auto data = std::get_if(command)) { + renderer.SwapBuffers(data->framebuffer); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().FlushRegion(data->addr, data->size); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().InvalidateRegion(data->addr, data->size); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); + } else { + UNREACHABLE(); + } +} + +/// Runs the GPU thread +static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, + SynchState& state) { + + MicroProfileOnThreadCreate("GpuThread"); + + auto WaitForWakeup = [&]() { + std::unique_lock lock{state.signal_mutex}; + state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); + }; + + // Wait for first GPU command before acquiring the window context + WaitForWakeup(); + + // If emulation was stopped during disk shader loading, abort before trying to acquire context + if (!state.is_running) { + return; + } + + Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; + + while (state.is_running) { + if (!state.is_running) { + return; + } + + { + // Thread has been woken up, so make the previous write queue the next read queue + std::lock_guard lock{state.signal_mutex}; + std::swap(state.push_queue, state.pop_queue); + } + + // Execute all of the GPU commands + while (!state.pop_queue->empty()) { + ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); + state.pop_queue->pop(); + } + + // Signal that the GPU thread has finished processing commands + if (state.IsIdle()) { + state.idle_condition.notify_one(); + } + + // Wait for CPU thread to send more GPU commands + WaitForWakeup(); + } +} + +ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) + : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), + std::ref(dma_pusher), std::ref(state)}, + thread_id{thread.get_id()} {} + +ThreadManager::~ThreadManager() { + { + // Notify GPU thread that a shutdown is pending + std::lock_guard lock{state.signal_mutex}; + state.is_running = false; + } + + state.signal_condition.notify_one(); + thread.join(); +} + +void ThreadManager::SubmitList(Tegra::CommandList&& entries) { + if (entries.empty()) { + return; + } + + PushCommand(SubmitListCommand(std::move(entries)), false, false); +} + +void ThreadManager::SwapBuffers( + std::optional> framebuffer) { + PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); +} + +void ThreadManager::FlushRegion(VAddr addr, u64 size) { + if (Settings::values.use_accurate_gpu_emulation) { + PushCommand(FlushRegionCommand(addr, size), true, false); + } +} + +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { + PushCommand(InvalidateRegionCommand(addr, size), true, true); +} + +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_accurate_gpu_emulation) { + PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); + } else { + InvalidateRegion(addr, size); + } +} + +void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { + { + std::lock_guard lock{state.signal_mutex}; + + if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { + // Execute the command synchronously on the current thread + ExecuteCommand(&command_data, renderer, dma_pusher); + return; + } + + // Push the command to the GPU thread + state.push_queue->emplace(command_data); + } + + // Signal the GPU thread that commands are pending + state.signal_condition.notify_one(); + + if (wait_for_idle) { + // Wait for the GPU to be idle (all commands to be executed) + std::unique_lock lock{state.idle_mutex}; + state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); + } +} + +} // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..ad9f9462b --- /dev/null +++ b/src/video_core/gpu_thread.h @@ -0,0 +1,135 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Tegra { +struct FramebufferConfig; +class DmaPusher; +} // namespace Tegra + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon::GPUThread { + +/// Command to signal to the GPU thread that a command list is ready for processing +struct SubmitListCommand final { + explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} + + Tegra::CommandList entries; +}; + +/// Command to signal to the GPU thread that a swap buffers is pending +struct SwapBuffersCommand final { + explicit SwapBuffersCommand(std::optional framebuffer) + : framebuffer{std::move(framebuffer)} {} + + std::optional framebuffer; +}; + +/// Command to signal to the GPU thread to flush a region +struct FlushRegionCommand final { + explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to invalidate a region +struct InvalidateRegionCommand final { + explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to flush and invalidate a region +struct FlushAndInvalidateRegionCommand final { + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) + : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +using CommandData = std::variant; + +/// Struct used to synchronize the GPU thread +struct SynchState final { + std::atomic is_running{true}; + std::condition_variable signal_condition; + std::mutex signal_mutex; + std::condition_variable idle_condition; + std::mutex idle_mutex; + + // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and + // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes + // empty. This allows for efficient thread-safe access, as it does not require any copies. + + using CommandQueue = std::queue; + std::array command_queues; + CommandQueue* push_queue{&command_queues[0]}; + CommandQueue* pop_queue{&command_queues[1]}; + + /// Returns true if the GPU thread should be idle, meaning there are no commands to process + bool IsIdle() const { + return command_queues[0].empty() && command_queues[1].empty(); + } +}; + +/// Class used to manage the GPU thread +class ThreadManager final { +public: + explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + ~ThreadManager(); + + /// Push GPU command entries to be processed + void SubmitList(Tegra::CommandList&& entries); + + /// Swap buffers (render frame) + void SwapBuffers( + std::optional> framebuffer); + + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(VAddr addr, u64 size); + + /// Waits the caller until the GPU thread is idle, used for synchronization + void WaitForIdle(); + +private: + /// Pushes a command to be executed by the GPU thread + void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); + + /// Returns true if this is called by the GPU thread + bool IsGpuThread() const { + return std::this_thread::get_id() == thread_id; + } + +private: + SynchState state; + std::thread thread; + std::thread::id thread_id; + VideoCore::RendererBase& renderer; + Tegra::DmaPusher& dma_pusher; +}; + +} // namespace VideoCommon::GPUThread -- cgit v1.2.3 From aaa373585cd55bd03fcc589d2ad9f749e2cb99d4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 8 Feb 2019 23:21:53 -0500 Subject: gpu: Refactor a/synchronous implementations into their own classes. --- src/video_core/CMakeLists.txt | 4 ++++ src/video_core/gpu.cpp | 48 ------------------------------------------- src/video_core/gpu.h | 26 ++++++++++------------- src/video_core/gpu_asynch.cpp | 37 +++++++++++++++++++++++++++++++++ src/video_core/gpu_asynch.h | 37 +++++++++++++++++++++++++++++++++ src/video_core/gpu_synch.cpp | 37 +++++++++++++++++++++++++++++++++ src/video_core/gpu_synch.h | 29 ++++++++++++++++++++++++++ 7 files changed, 155 insertions(+), 63 deletions(-) create mode 100644 src/video_core/gpu_asynch.cpp create mode 100644 src/video_core/gpu_asynch.h create mode 100644 src/video_core/gpu_synch.cpp create mode 100644 src/video_core/gpu_synch.h (limited to 'src/video_core') diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3bb5d0ed7..a4cb33c17 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -17,6 +17,10 @@ add_library(video_core STATIC engines/shader_header.h gpu.cpp gpu.h + gpu_asynch.cpp + gpu_asynch.h + gpu_synch.cpp + gpu_synch.h gpu_thread.cpp gpu_thread.h macro_interpreter.cpp diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 0d7a052dd..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,14 +6,12 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" -#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" -#include "video_core/gpu_thread.h" #include "video_core/renderer_base.h" namespace Tegra { @@ -39,10 +37,6 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren kepler_compute = std::make_unique(*memory_manager); maxwell_dma = std::make_unique(system, rasterizer, *memory_manager); kepler_memory = std::make_unique(system, rasterizer, *memory_manager); - - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread = std::make_unique(renderer, *dma_pusher); - } } GPU::~GPU() = default; @@ -71,48 +65,6 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } -void GPU::PushGPUEntries(Tegra::CommandList&& entries) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->SubmitList(std::move(entries)); - } else { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); - } -} - -void GPU::SwapBuffers( - std::optional> framebuffer) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->SwapBuffers(std::move(framebuffer)); - } else { - renderer.SwapBuffers(std::move(framebuffer)); - } -} - -void GPU::FlushRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->FlushRegion(addr, size); - } else { - renderer.Rasterizer().FlushRegion(addr, size); - } -} - -void GPU::InvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->InvalidateRegion(addr, size); - } else { - renderer.Rasterizer().InvalidateRegion(addr, size); - } -} - -void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->FlushAndInvalidateRegion(addr, size); - } else { - renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); - } -} - u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 3f3098bf1..14a421cc1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,10 +19,6 @@ namespace VideoCore { class RendererBase; } // namespace VideoCore -namespace VideoCommon::GPUThread { -class ThreadManager; -} // namespace VideoCommon::GPUThread - namespace Tegra { enum class RenderTargetFormat : u32 { @@ -123,7 +119,7 @@ enum class EngineID { MAXWELL_DMA_COPY_A = 0xB0B5, }; -class GPU final { +class GPU { public: explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); @@ -206,20 +202,20 @@ public: } regs{}; /// Push GPU command entries to be processed - void PushGPUEntries(Tegra::CommandList&& entries); + virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; /// Swap buffers (render frame) - void SwapBuffers( - std::optional> framebuffer); + virtual void SwapBuffers( + std::optional> framebuffer) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(VAddr addr, u64 size); + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - void InvalidateRegion(VAddr addr, u64 size); + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(VAddr addr, u64 size); + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; private: void ProcessBindMethod(const MethodCall& method_call); @@ -236,13 +232,13 @@ private: /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); -private: +protected: std::unique_ptr dma_pusher; - std::unique_ptr memory_manager; - std::unique_ptr gpu_thread; - VideoCore::RendererBase& renderer; +private: + std::unique_ptr memory_manager; + /// Mapping of command subchannels to their bound engine ids. std::array bound_engines = {}; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp new file mode 100644 index 000000000..ad0a747e3 --- /dev/null +++ b/src/video_core/gpu_asynch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} + +GPUAsynch::~GPUAsynch() = default; + +void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { + gpu_thread.SubmitList(std::move(entries)); +} + +void GPUAsynch::SwapBuffers( + std::optional> framebuffer) { + gpu_thread.SwapBuffers(std::move(framebuffer)); +} + +void GPUAsynch::FlushRegion(VAddr addr, u64 size) { + gpu_thread.FlushRegion(addr, size); +} + +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { + gpu_thread.InvalidateRegion(addr, size); +} + +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + gpu_thread.FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h new file mode 100644 index 000000000..58046f3e9 --- /dev/null +++ b/src/video_core/gpu_asynch.h @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +namespace GPUThread { +class ThreadManager; +} // namespace GPUThread + +/// Implementation of GPU interface that runs the GPU asynchronously +class GPUAsynch : public Tegra::GPU { +public: + explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUAsynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + +private: + GPUThread::ThreadManager gpu_thread; +}; + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp new file mode 100644 index 000000000..4c00b96c7 --- /dev/null +++ b/src/video_core/gpu_synch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_synch.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer) {} + +GPUSynch::~GPUSynch() = default; + +void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); +} + +void GPUSynch::SwapBuffers( + std::optional> framebuffer) { + renderer.SwapBuffers(std::move(framebuffer)); +} + +void GPUSynch::FlushRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushRegion(addr, size); +} + +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().InvalidateRegion(addr, size); +} + +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h new file mode 100644 index 000000000..658f683e2 --- /dev/null +++ b/src/video_core/gpu_synch.h @@ -0,0 +1,29 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +/// Implementation of GPU interface that runs the GPU synchronously +class GPUSynch : public Tegra::GPU { +public: + explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUSynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; +}; + +} // namespace VideoCommon -- cgit v1.2.3 From 3f1b4fb23ad7e689941b5a01afa15780bc50b77b Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Feb 2019 00:06:35 -0500 Subject: gpu: Always flush. --- src/video_core/gpu_thread.cpp | 13 +++++-------- src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 +----- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 22c4cca4d..7640da6c0 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -110,9 +110,8 @@ void ThreadManager::SwapBuffers( } void ThreadManager::FlushRegion(VAddr addr, u64 size) { - if (Settings::values.use_accurate_gpu_emulation) { - PushCommand(FlushRegionCommand(addr, size), true, false); - } + // Block the CPU when using accurate emulation + PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { @@ -120,11 +119,9 @@ void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { } void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_accurate_gpu_emulation) { - PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); - } else { - InvalidateRegion(addr, size); - } + // Block the CPU when using accurate emulation + PushCommand(FlushAndInvalidateRegionCommand(addr, size), + Settings::values.use_accurate_gpu_emulation, false); } void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 321d9dd3d..168288088 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -749,11 +749,7 @@ void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - - if (Settings::values.use_accurate_gpu_emulation) { - // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit - res_cache.FlushRegion(addr, size); - } + res_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { -- cgit v1.2.3 From 63aa08acbe1da5b78d9f0b37088081a24591849f Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Feb 2019 02:55:45 -0500 Subject: gpu_thread: (HACK) Ignore flush on FlushAndInvalidateRegion. --- src/video_core/gpu_thread.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 7640da6c0..4c25380eb 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -119,9 +119,7 @@ void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { } void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { - // Block the CPU when using accurate emulation - PushCommand(FlushAndInvalidateRegionCommand(addr, size), - Settings::values.use_accurate_gpu_emulation, false); + InvalidateRegion(addr, size); } void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { -- cgit v1.2.3 From 84ad81ee6798ece6c66016c4581b5fe57ce7b20e Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Feb 2019 11:37:11 -0500 Subject: gpu_thread: Fix deadlock with threading idle state check. --- src/video_core/gpu_thread.cpp | 11 +++++++---- src/video_core/gpu_thread.h | 7 ++++--- 2 files changed, 11 insertions(+), 7 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4c25380eb..c5bdd2a17 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -40,7 +40,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p auto WaitForWakeup = [&]() { std::unique_lock lock{state.signal_mutex}; - state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); + state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); }; // Wait for first GPU command before acquiring the window context @@ -70,8 +70,10 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p state.pop_queue->pop(); } + state.UpdateIdleState(); + // Signal that the GPU thread has finished processing commands - if (state.IsIdle()) { + if (state.is_idle) { state.idle_condition.notify_one(); } @@ -126,13 +128,14 @@ void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, { std::lock_guard lock{state.signal_mutex}; - if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { + if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { // Execute the command synchronously on the current thread ExecuteCommand(&command_data, renderer, dma_pusher); return; } // Push the command to the GPU thread + state.UpdateIdleState(); state.push_queue->emplace(command_data); } @@ -142,7 +145,7 @@ void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, if (wait_for_idle) { // Wait for the GPU to be idle (all commands to be executed) std::unique_lock lock{state.idle_mutex}; - state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); + state.idle_condition.wait(lock, [this] { return static_cast(state.is_idle); }); } } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index ad9f9462b..2ad8214cc 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -70,6 +70,7 @@ using CommandData = std::variant is_running{true}; + std::atomic is_idle{true}; std::condition_variable signal_condition; std::mutex signal_mutex; std::condition_variable idle_condition; @@ -84,9 +85,9 @@ struct SynchState final { CommandQueue* push_queue{&command_queues[0]}; CommandQueue* pop_queue{&command_queues[1]}; - /// Returns true if the GPU thread should be idle, meaning there are no commands to process - bool IsIdle() const { - return command_queues[0].empty() && command_queues[1].empty(); + void UpdateIdleState() { + std::lock_guard lock{idle_mutex}; + is_idle = command_queues[0].empty() && command_queues[1].empty(); } }; -- cgit v1.2.3