diff options
26 files changed, 529 insertions, 52 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp index d741ef90d..eba2177d1 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -36,7 +36,8 @@ #include "frontend/applets/software_keyboard.h" #include "frontend/applets/web_browser.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/gpu.h" +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" @@ -129,10 +130,16 @@ struct System::Impl { return ResultStatus::ErrorVideoCore; } - gpu_core = std::make_unique<Tegra::GPU>(system, renderer->Rasterizer()); + is_powered_on = true; + + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_core = std::make_unique<VideoCommon::GPUAsynch>(system, *renderer); + } else { + gpu_core = std::make_unique<VideoCommon::GPUSynch>(system, *renderer); + } cpu_core_manager.Initialize(system); - is_powered_on = true; + LOG_DEBUG(Core, "Initialized OK"); // Reset counters and set time origin to current frame diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index dbe7ee6e8..20c7c39aa 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 auto& instance = Core::System::GetInstance(); instance.GetPerfStats().EndGameFrame(); - instance.Renderer().SwapBuffers(framebuffer); + instance.GPU().SwapBuffers(framebuffer); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 466db7ccd..a34b9e753 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou auto& gpu = system_instance.GPU(); auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); ASSERT(cpu_addr); - system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); + gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 0a650f36c..8ce7bc7a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector<u8>& input, std::vector< return 0; } -static void PushGPUEntries(Tegra::CommandList&& entries) { - if (entries.empty()) { - return; - } - - auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()}; - dma_pusher.Push(std::move(entries)); - dma_pusher.DispatchCalls(); -} - u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& output) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); @@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], params.num_entries * sizeof(Tegra::CommandListHeader)); - PushGPUEntries(std::move(entries)); + Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; @@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output) Memory::ReadBlock(params.address, entries.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); - PushGPUEntries(std::move(entries)); + Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 56f31e2ac..fc496b654 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -186,7 +186,7 @@ void NVFlinger::Compose() { // There was no queued buffer to draw, render previous frame system_instance.GetPerfStats().EndGameFrame(); - system_instance.Renderer().SwapBuffers({}); + system_instance.GPU().SwapBuffers({}); continue; } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index ec279cef8..6591c45d2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { const VAddr overlap_end = std::min(end, region_end); const VAddr overlap_size = overlap_end - overlap_start; - auto& rasterizer = system_instance.Renderer().Rasterizer(); + auto& gpu = system_instance.GPU(); switch (mode) { case FlushMode::Flush: - rasterizer.FlushRegion(overlap_start, overlap_size); + gpu.FlushRegion(overlap_start, overlap_size); break; case FlushMode::Invalidate: - rasterizer.InvalidateRegion(overlap_start, overlap_size); + gpu.InvalidateRegion(overlap_start, overlap_size); break; case FlushMode::FlushAndInvalidate: - rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); + gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); break; } }; diff --git a/src/core/settings.h b/src/core/settings.h index 7e76e0466..cdfb2f742 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -393,6 +393,7 @@ struct Values { u16 frame_limit; bool use_disk_shader_cache; bool use_accurate_gpu_emulation; + bool use_asynchronous_gpu_emulation; float bg_red; float bg_green; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 58dfcc4df..e1db06811 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() { Settings::values.use_disk_shader_cache); AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); + AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation", + Settings::values.use_asynchronous_gpu_emulation); AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index c1ae83f4d..57f31cd58 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -17,6 +17,12 @@ add_library(video_core STATIC engines/shader_header.h gpu.cpp gpu.h + gpu_asynch.cpp + gpu_asynch.h + gpu_synch.cpp + gpu_synch.h + gpu_thread.cpp + gpu_thread.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { // We have to invalidate the destination region to evict any outdated surfaces from the cache. // We do this before actually writing the new data because the destination address might contain // a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); Memory::Write32(*dest_address, data); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0474c7ba3..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() { const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated // copying. - rasterizer.FlushRegion(*source_cpu, src_size); + Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); // We have to invalidate the destination region to evict any outdated surfaces from the // cache. We do this before actually writing the new data because the destination address // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_cpu, dst_size); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); }; if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ac30d1a89..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -12,7 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" -#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" namespace Tegra { @@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { UNREACHABLE(); } -GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { +GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { + auto& rasterizer{renderer.Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(); dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 6313702f2..14a421cc1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -16,8 +16,8 @@ class System; } namespace VideoCore { -class RasterizerInterface; -} +class RendererBase; +} // namespace VideoCore namespace Tegra { @@ -119,9 +119,10 @@ enum class EngineID { MAXWELL_DMA_COPY_A = 0xB0B5, }; -class GPU final { +class GPU { public: - explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); + ~GPU(); struct MethodCall { @@ -200,8 +201,42 @@ public: }; } regs{}; + /// Push GPU command entries to be processed + virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; + + /// Swap buffers (render frame) + virtual void SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; + + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + virtual void FlushRegion(VAddr addr, u64 size) = 0; + + /// Notify rasterizer that any caches of the specified region should be invalidated + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; + private: + void ProcessBindMethod(const MethodCall& method_call); + void ProcessSemaphoreTriggerMethod(); + void ProcessSemaphoreRelease(); + void ProcessSemaphoreAcquire(); + + /// Calls a GPU puller method. + void CallPullerMethod(const MethodCall& method_call); + + /// Calls a GPU engine method. + void CallEngineMethod(const MethodCall& method_call); + + /// Determines where the method should be executed. + bool ExecuteMethodOnEngine(const MethodCall& method_call); + +protected: std::unique_ptr<Tegra::DmaPusher> dma_pusher; + VideoCore::RendererBase& renderer; + +private: std::unique_ptr<Tegra::MemoryManager> memory_manager; /// Mapping of command subchannels to their bound engine ids. @@ -217,18 +252,6 @@ private: std::unique_ptr<Engines::MaxwellDMA> maxwell_dma; /// Inline memory engine std::unique_ptr<Engines::KeplerMemory> kepler_memory; - - void ProcessBindMethod(const MethodCall& method_call); - void ProcessSemaphoreTriggerMethod(); - void ProcessSemaphoreRelease(); - void ProcessSemaphoreAcquire(); - - // Calls a GPU puller method. - void CallPullerMethod(const MethodCall& method_call); - // Calls a GPU engine method. - void CallEngineMethod(const MethodCall& method_call); - // Determines where the method should be executed. - bool ExecuteMethodOnEngine(const MethodCall& method_call); }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp new file mode 100644 index 000000000..ad0a747e3 --- /dev/null +++ b/src/video_core/gpu_asynch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} + +GPUAsynch::~GPUAsynch() = default; + +void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { + gpu_thread.SubmitList(std::move(entries)); +} + +void GPUAsynch::SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { + gpu_thread.SwapBuffers(std::move(framebuffer)); +} + +void GPUAsynch::FlushRegion(VAddr addr, u64 size) { + gpu_thread.FlushRegion(addr, size); +} + +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { + gpu_thread.InvalidateRegion(addr, size); +} + +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + gpu_thread.FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h new file mode 100644 index 000000000..58046f3e9 --- /dev/null +++ b/src/video_core/gpu_asynch.h @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +namespace GPUThread { +class ThreadManager; +} // namespace GPUThread + +/// Implementation of GPU interface that runs the GPU asynchronously +class GPUAsynch : public Tegra::GPU { +public: + explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUAsynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + +private: + GPUThread::ThreadManager gpu_thread; +}; + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp new file mode 100644 index 000000000..4c00b96c7 --- /dev/null +++ b/src/video_core/gpu_synch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_synch.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer) {} + +GPUSynch::~GPUSynch() = default; + +void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); +} + +void GPUSynch::SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { + renderer.SwapBuffers(std::move(framebuffer)); +} + +void GPUSynch::FlushRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushRegion(addr, size); +} + +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().InvalidateRegion(addr, size); +} + +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h new file mode 100644 index 000000000..658f683e2 --- /dev/null +++ b/src/video_core/gpu_synch.h @@ -0,0 +1,29 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +/// Implementation of GPU interface that runs the GPU synchronously +class GPUSynch : public Tegra::GPU { +public: + explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUSynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; +}; + +} // namespace VideoCommon diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..c5bdd2a17 --- /dev/null +++ b/src/video_core/gpu_thread.cpp @@ -0,0 +1,152 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/microprofile.h" +#include "core/frontend/scope_acquire_window_context.h" +#include "core/settings.h" +#include "video_core/dma_pusher.h" +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon::GPUThread { + +/// Executes a single GPU thread command +static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher) { + if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { + dma_pusher.Push(std::move(submit_list->entries)); + dma_pusher.DispatchCalls(); + } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { + renderer.SwapBuffers(data->framebuffer); + } else if (const auto data = std::get_if<FlushRegionCommand>(command)) { + renderer.Rasterizer().FlushRegion(data->addr, data->size); + } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { + renderer.Rasterizer().InvalidateRegion(data->addr, data->size); + } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { + renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); + } else { + UNREACHABLE(); + } +} + +/// Runs the GPU thread +static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, + SynchState& state) { + + MicroProfileOnThreadCreate("GpuThread"); + + auto WaitForWakeup = [&]() { + std::unique_lock<std::mutex> lock{state.signal_mutex}; + state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); + }; + + // Wait for first GPU command before acquiring the window context + WaitForWakeup(); + + // If emulation was stopped during disk shader loading, abort before trying to acquire context + if (!state.is_running) { + return; + } + + Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; + + while (state.is_running) { + if (!state.is_running) { + return; + } + + { + // Thread has been woken up, so make the previous write queue the next read queue + std::lock_guard<std::mutex> lock{state.signal_mutex}; + std::swap(state.push_queue, state.pop_queue); + } + + // Execute all of the GPU commands + while (!state.pop_queue->empty()) { + ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); + state.pop_queue->pop(); + } + + state.UpdateIdleState(); + + // Signal that the GPU thread has finished processing commands + if (state.is_idle) { + state.idle_condition.notify_one(); + } + + // Wait for CPU thread to send more GPU commands + WaitForWakeup(); + } +} + +ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) + : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), + std::ref(dma_pusher), std::ref(state)}, + thread_id{thread.get_id()} {} + +ThreadManager::~ThreadManager() { + { + // Notify GPU thread that a shutdown is pending + std::lock_guard<std::mutex> lock{state.signal_mutex}; + state.is_running = false; + } + + state.signal_condition.notify_one(); + thread.join(); +} + +void ThreadManager::SubmitList(Tegra::CommandList&& entries) { + if (entries.empty()) { + return; + } + + PushCommand(SubmitListCommand(std::move(entries)), false, false); +} + +void ThreadManager::SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { + PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); +} + +void ThreadManager::FlushRegion(VAddr addr, u64 size) { + // Block the CPU when using accurate emulation + PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); +} + +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { + PushCommand(InvalidateRegionCommand(addr, size), true, true); +} + +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { + InvalidateRegion(addr, size); +} + +void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { + { + std::lock_guard<std::mutex> lock{state.signal_mutex}; + + if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { + // Execute the command synchronously on the current thread + ExecuteCommand(&command_data, renderer, dma_pusher); + return; + } + + // Push the command to the GPU thread + state.UpdateIdleState(); + state.push_queue->emplace(command_data); + } + + // Signal the GPU thread that commands are pending + state.signal_condition.notify_one(); + + if (wait_for_idle) { + // Wait for the GPU to be idle (all commands to be executed) + std::unique_lock<std::mutex> lock{state.idle_mutex}; + state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); }); + } +} + +} // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..2ad8214cc --- /dev/null +++ b/src/video_core/gpu_thread.h @@ -0,0 +1,136 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <atomic> +#include <condition_variable> +#include <memory> +#include <mutex> +#include <optional> +#include <thread> +#include <variant> + +namespace Tegra { +struct FramebufferConfig; +class DmaPusher; +} // namespace Tegra + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon::GPUThread { + +/// Command to signal to the GPU thread that a command list is ready for processing +struct SubmitListCommand final { + explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} + + Tegra::CommandList entries; +}; + +/// Command to signal to the GPU thread that a swap buffers is pending +struct SwapBuffersCommand final { + explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) + : framebuffer{std::move(framebuffer)} {} + + std::optional<const Tegra::FramebufferConfig> framebuffer; +}; + +/// Command to signal to the GPU thread to flush a region +struct FlushRegionCommand final { + explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to invalidate a region +struct InvalidateRegionCommand final { + explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to flush and invalidate a region +struct FlushAndInvalidateRegionCommand final { + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) + : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, + InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; + +/// Struct used to synchronize the GPU thread +struct SynchState final { + std::atomic<bool> is_running{true}; + std::atomic<bool> is_idle{true}; + std::condition_variable signal_condition; + std::mutex signal_mutex; + std::condition_variable idle_condition; + std::mutex idle_mutex; + + // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and + // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes + // empty. This allows for efficient thread-safe access, as it does not require any copies. + + using CommandQueue = std::queue<CommandData>; + std::array<CommandQueue, 2> command_queues; + CommandQueue* push_queue{&command_queues[0]}; + CommandQueue* pop_queue{&command_queues[1]}; + + void UpdateIdleState() { + std::lock_guard<std::mutex> lock{idle_mutex}; + is_idle = command_queues[0].empty() && command_queues[1].empty(); + } +}; + +/// Class used to manage the GPU thread +class ThreadManager final { +public: + explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + ~ThreadManager(); + + /// Push GPU command entries to be processed + void SubmitList(Tegra::CommandList&& entries); + + /// Swap buffers (render frame) + void SwapBuffers( + std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); + + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(VAddr addr, u64 size); + + /// Waits the caller until the GPU thread is idle, used for synchronization + void WaitForIdle(); + +private: + /// Pushes a command to be executed by the GPU thread + void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); + + /// Returns true if this is called by the GPU thread + bool IsGpuThread() const { + return std::this_thread::get_id() == thread_id; + } + +private: + SynchState state; + std::thread thread; + std::thread::id thread_id; + VideoCore::RendererBase& renderer; + Tegra::DmaPusher& dma_pusher; +}; + +} // namespace VideoCommon::GPUThread diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 321d9dd3d..168288088 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -749,11 +749,7 @@ void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - - if (Settings::values.use_accurate_gpu_emulation) { - // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit - res_cache.FlushRegion(addr, size); - } + res_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 73b04b749..3b070bfbb 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -20,10 +20,7 @@ EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} void EmuThread::run() { - if (!Settings::values.use_multi_core) { - // Single core mode must acquire OpenGL context for entire emulation session - render_window->MakeCurrent(); - } + render_window->MakeCurrent(); MicroProfileOnThreadCreate("EmuThread"); @@ -38,6 +35,11 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); + if (Settings::values.use_asynchronous_gpu_emulation) { + // Release OpenGL context for the GPU thread + render_window->DoneCurrent(); + } + // holds whether the cpu was running during the last iteration, // so that the DebugModeLeft signal can be emitted before the // next execution step diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9546dadf..74dc6bb28 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -374,6 +374,8 @@ void Config::ReadValues() { qt_config->value("use_disk_shader_cache", false).toBool(); Settings::values.use_accurate_gpu_emulation = qt_config->value("use_accurate_gpu_emulation", false).toBool(); + Settings::values.use_asynchronous_gpu_emulation = + qt_config->value("use_asynchronous_gpu_emulation", false).toBool(); Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); @@ -633,6 +635,8 @@ void Config::SaveValues() { qt_config->setValue("frame_limit", Settings::values.frame_limit); qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); + qt_config->setValue("use_asynchronous_gpu_emulation", + Settings::values.use_asynchronous_gpu_emulation); // Cast to double because Qt's written float values are not human-readable qt_config->setValue("bg_red", (double)Settings::values.bg_red); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 0f5dd534b..dd1d67488 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() { ui->frame_limit->setValue(Settings::values.frame_limit); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); + ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue)); } @@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() { Settings::values.frame_limit = ui->frame_limit->value(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); + Settings::values.use_asynchronous_gpu_emulation = + ui->use_asynchronous_gpu_emulation->isChecked(); Settings::values.bg_red = static_cast<float>(bg_color.redF()); Settings::values.bg_green = static_cast<float>(bg_color.greenF()); Settings::values.bg_blue = static_cast<float>(bg_color.blueF()); diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 824f5810a..c6767e0ca 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -64,6 +64,13 @@ </widget> </item> <item> + <widget class="QCheckBox" name="use_asynchronous_gpu_emulation"> + <property name="text"> + <string>Use asynchronous GPU emulation</string> + </property> + </widget> + </item> + <item> <layout class="QHBoxLayout" name="horizontalLayout"> <item> <widget class="QLabel" name="label"> diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index ff05b3179..ca880dc65 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -354,6 +354,8 @@ void Config::ReadValues() { sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); Settings::values.use_accurate_gpu_emulation = sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); + Settings::values.use_asynchronous_gpu_emulation = + sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a81986f8e..6538af098 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -118,6 +118,10 @@ use_disk_shader_cache = # 0 (default): Off (fast), 1 : On (slow) use_accurate_gpu_emulation = +# Whether to use asynchronous GPU emulation +# 0 : Off (slow), 1 (default): On (fast) +use_asynchronous_gpu_emulation = + # The clear color for the renderer. What shows up on the sides of the bottom screen. # Must be in range of 0.0-1.0. Defaults to 1.0 for all. bg_red = |