diff options
Diffstat (limited to 'src/video_core')
18 files changed, 79 insertions, 90 deletions
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 8b355cf7b..db507cf04 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -9,7 +9,7 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} + : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {} GPUAsynch::~GPUAsynch() = default; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c5dc199c5..23f9bd422 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,9 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -36,7 +39,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { - state.DecrementFramesCounter(); renderer.SwapBuffers(std::move(data->framebuffer)); } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { renderer.Rasterizer().FlushRegion(data->addr, data->size); @@ -47,13 +49,18 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p } else { UNREACHABLE(); } + state.signaled_fence = next.fence; + state.TrySynchronize(); } } } -ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) - : renderer{renderer}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), - std::ref(state)} {} +ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher) + : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} { + synchronization_event = system.CoreTiming().RegisterEvent( + "GPUThreadSynch", [this](u64 fence, int) { state.WaitForSynchronization(fence); }); +} ThreadManager::~ThreadManager() { // Notify GPU thread that a shutdown is pending @@ -62,14 +69,14 @@ ThreadManager::~ThreadManager() { } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - PushCommand(SubmitListCommand(std::move(entries))); + const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; + const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; + system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); } void ThreadManager::SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { - state.IncrementFramesCounter(); PushCommand(SwapBuffersCommand(std::move(framebuffer))); - state.WaitForFrames(); } void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { @@ -79,7 +86,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { if (state.queue.Empty()) { // It's quicker to invalidate a single region on the CPU if the queue is already empty - renderer.Rasterizer().InvalidateRegion(addr, size); + system.Renderer().Rasterizer().InvalidateRegion(addr, size); } else { PushCommand(InvalidateRegionCommand(addr, size)); } @@ -90,9 +97,25 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } -void ThreadManager::PushCommand(CommandData&& command_data) { - state.queue.Push(CommandDataContainer(std::move(command_data))); +u64 ThreadManager::PushCommand(CommandData&& command_data) { + const u64 fence{++state.last_fence}; + state.queue.Push(CommandDataContainer(std::move(command_data), fence)); state.SignalCommands(); + return fence; +} + +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); +void SynchState::WaitForSynchronization(u64 fence) { + if (signaled_fence >= fence) { + return; + } + + // Wait for the GPU to be idle (all commands to be executed) + { + MICROPROFILE_SCOPE(GPU_wait); + std::unique_lock<std::mutex> lock{synchronization_mutex}; + synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); + } } } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 70acb2e79..62bcea5bb 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -19,9 +19,12 @@ struct FramebufferConfig; class DmaPusher; } // namespace Tegra -namespace VideoCore { -class RendererBase; -} // namespace VideoCore +namespace Core { +class System; +namespace Timing { +struct EventType; +} // namespace Timing +} // namespace Core namespace VideoCommon::GPUThread { @@ -75,63 +78,47 @@ using CommandData = struct CommandDataContainer { CommandDataContainer() = default; - CommandDataContainer(CommandData&& data) : data{std::move(data)} {} + CommandDataContainer(CommandData&& data, u64 next_fence) + : data{std::move(data)}, fence{next_fence} {} CommandDataContainer& operator=(const CommandDataContainer& t) { data = std::move(t.data); + fence = t.fence; return *this; } CommandData data; + u64 fence{}; }; /// Struct used to synchronize the GPU thread struct SynchState final { std::atomic_bool is_running{true}; std::atomic_int queued_frame_count{}; - std::mutex frames_mutex; + std::mutex synchronization_mutex; std::mutex commands_mutex; std::condition_variable commands_condition; - std::condition_variable frames_condition; + std::condition_variable synchronization_condition; - void IncrementFramesCounter() { - std::lock_guard lock{frames_mutex}; - ++queued_frame_count; + /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU + /// synchronized. This is entirely empirical. + bool IsSynchronized() const { + constexpr std::size_t max_queue_gap{5}; + return queue.Size() <= max_queue_gap; } - void DecrementFramesCounter() { - { - std::lock_guard lock{frames_mutex}; - --queued_frame_count; - - if (queued_frame_count) { - return; - } + void TrySynchronize() { + if (IsSynchronized()) { + std::lock_guard<std::mutex> lock{synchronization_mutex}; + synchronization_condition.notify_one(); } - frames_condition.notify_one(); } - void WaitForFrames() { - { - std::lock_guard lock{frames_mutex}; - if (!queued_frame_count) { - return; - } - } - - // Wait for the GPU to be idle (all commands to be executed) - { - std::unique_lock lock{frames_mutex}; - frames_condition.wait(lock, [this] { return !queued_frame_count; }); - } - } + void WaitForSynchronization(u64 fence); void SignalCommands() { - { - std::unique_lock lock{commands_mutex}; - if (queue.Empty()) { - return; - } + if (queue.Empty()) { + return; } commands_condition.notify_one(); @@ -144,12 +131,15 @@ struct SynchState final { using CommandQueue = Common::SPSCQueue<CommandDataContainer>; CommandQueue queue; + u64 last_fence{}; + std::atomic<u64> signaled_fence{}; }; /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher); ~ThreadManager(); /// Push GPU command entries to be processed @@ -170,11 +160,12 @@ public: private: /// Pushes a command to be executed by the GPU thread - void PushCommand(CommandData&& command_data); + u64 PushCommand(CommandData&& command_data); private: SynchState state; - VideoCore::RendererBase& renderer; + Core::System& system; + Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 64f75db43..524d9ea5a 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res } u32 MacroInterpreter::FetchParameter() { - ASSERT(next_parameter_index < parameters.size()); - return parameters[next_parameter_index++]; + return parameters.at(next_parameter_index++); } u32 MacroInterpreter::GetRegister(u32 register_id) const { - // Register 0 is supposed to always return 0. - if (register_id == 0) - return 0; - - ASSERT(register_id < registers.size()); - return registers[register_id]; + return registers.at(register_id); } void MacroInterpreter::SetRegister(u32 register_id, u32 value) { - // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero - // register. - if (register_id == 0) + // Register 0 is hardwired as the zero register. + // Ensure no writes to it actually occur. + if (register_id == 0) { return; + } - ASSERT(register_id < registers.size()); - registers[register_id] = value; + registers.at(register_id) = value; } void MacroInterpreter::SetMethodAddress(u32 address) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index fd091c84c..7989ec11b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,7 +7,6 @@ #include "common/alignment.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index da9326253..5842d6213 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -4,7 +4,6 @@ #include <glad/glad.h> -#include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" #include "video_core/renderer_opengl/gl_global_cache.h" diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp index 2bcbd3da2..c3e94d917 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -7,7 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/memory.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_primitive_assembler.h" diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h index 0e2e7dc36..4e87ce4d6 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.h +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h @@ -4,11 +4,9 @@ #pragma once -#include <vector> #include <glad/glad.h> #include "common/common_types.h" -#include "video_core/memory_manager.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8f012db62..7ff1e6737 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -17,7 +17,6 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" @@ -26,7 +25,6 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#include "video_core/video_core.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 4de565321..54fbf48aa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -12,15 +12,12 @@ #include <optional> #include <tuple> #include <utility> -#include <vector> #include <boost/icl/interval_map.hpp> -#include <boost/range/iterator_range.hpp> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" @@ -29,10 +26,8 @@ #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" namespace Core { class System; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index aba6ce731..7a3280620 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -13,7 +13,6 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/hle/kernel/process.h" -#include "core/memory.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/morton.h" diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e8073579f..ad4fd3ad2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -5,10 +5,9 @@ #pragma once #include <array> -#include <map> #include <memory> #include <string> -#include <unordered_set> +#include <tuple> #include <vector> #include "common/alignment.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index fd1c85115..0cf8e0b3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,21 +5,20 @@ #pragma once #include <array> +#include <atomic> #include <memory> #include <set> #include <tuple> #include <unordered_map> +#include <vector> #include <glad/glad.h> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" namespace Core { class System; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 26162e56f..3222028d5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1196,11 +1196,12 @@ private: switch (meta->element) { case 0: case 1: - return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element); + return "itof(int(textureSize(" + sampler + ", " + lod + ')' + + GetSwizzle(meta->element) + "))"; case 2: return "0"; case 3: - return "textureQueryLevels(" + sampler + ')'; + return "itof(textureQueryLevels(" + sampler + "))"; } UNREACHABLE(); return "0"; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 72aca4938..4e04ab2f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -5,7 +5,6 @@ #pragma once #include <array> -#include <set> #include <string> #include <utility> #include <vector> diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7d96649af..8763d9c71 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <fmt/format.h> -#include "common/assert.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fba8e681b..fad346b48 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,12 +4,9 @@ #pragma once -#include <array> -#include <string> #include <vector> #include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index a01efeb05..d69cba9c3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <cstddef> #include <cstdlib> -#include <cstring> #include <memory> #include <glad/glad.h> #include "common/assert.h" |