diff options
Diffstat (limited to 'src/video_core')
40 files changed, 534 insertions, 242 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 14b76680f..242a0d1cd 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -128,7 +128,9 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_scheduler.cpp renderer_vulkan/vk_scheduler.h renderer_vulkan/vk_stream_buffer.cpp - renderer_vulkan/vk_stream_buffer.h) + renderer_vulkan/vk_stream_buffer.h + renderer_vulkan/vk_swapchain.cpp + renderer_vulkan/vk_swapchain.h) target_include_directories(video_core PRIVATE ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) @@ -137,4 +139,4 @@ endif() create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) -target_link_libraries(video_core PRIVATE glad lz4_static) +target_link_libraries(video_core PRIVATE glad) diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 5ffb492ea..f0ef67535 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -10,7 +10,7 @@ namespace Tegra { void DebugContext::DoOnEvent(Event event, void* data) { { - std::unique_lock<std::mutex> lock(breakpoint_mutex); + std::unique_lock lock{breakpoint_mutex}; // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will // show on debug widgets @@ -32,7 +32,7 @@ void DebugContext::DoOnEvent(Event event, void* data) { void DebugContext::Resume() { { - std::lock_guard<std::mutex> lock(breakpoint_mutex); + std::lock_guard lock{breakpoint_mutex}; // Tell all observers that we are about to resume for (auto& breakpoint_observer : breakpoint_observers) { diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index c235faf46..ac3a2eb01 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -40,7 +40,7 @@ public: /// Constructs the object such that it observes events of the given DebugContext. explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context) : context_weak(debug_context) { - std::unique_lock<std::mutex> lock(debug_context->breakpoint_mutex); + std::unique_lock lock{debug_context->breakpoint_mutex}; debug_context->breakpoint_observers.push_back(this); } @@ -48,7 +48,7 @@ public: auto context = context_weak.lock(); if (context) { { - std::unique_lock<std::mutex> lock(context->breakpoint_mutex); + std::unique_lock lock{context->breakpoint_mutex}; context->breakpoint_observers.remove(this); } diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 03b7ee5d8..55966eef1 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -6,12 +6,13 @@ #include "common/logging/log.h" #include "common/math_util.h" #include "video_core/engines/fermi_2d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" namespace Tegra::Engines { Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), rasterizer{rasterizer} {} + : rasterizer{rasterizer}, memory_manager{memory_manager} {} void Fermi2D::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method_call.method < Regs::NUM_REGS, diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 80523e320..2e51b7f13 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -10,7 +10,10 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" + +namespace Tegra { +class MemoryManager; +} namespace VideoCore { class RasterizerInterface; @@ -115,10 +118,9 @@ public: }; } regs{}; - MemoryManager& memory_manager; - private: VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 6575afd0f..fb6cdf432 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -9,7 +9,10 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" + +namespace Tegra { +class MemoryManager; +} namespace Tegra::Engines { @@ -40,10 +43,11 @@ public: static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "KeplerCompute Regs has wrong size"); - MemoryManager& memory_manager; - /// Write the value to the register identified by method. void CallMethod(const GPU::MethodCall& method_call); + +private: + MemoryManager& memory_manager; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index e259bf46b..cd51a31d7 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -5,9 +5,9 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" @@ -15,7 +15,7 @@ namespace Tegra::Engines { KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : system{system}, memory_manager(memory_manager), rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} KeplerMemory::~KeplerMemory() = default; diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h index 9181e9d80..78b6c3e45 100644 --- a/src/video_core/engines/kepler_memory.h +++ b/src/video_core/engines/kepler_memory.h @@ -10,12 +10,15 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" namespace Core { class System; } +namespace Tegra { +class MemoryManager; +} + namespace VideoCore { class RasterizerInterface; } @@ -82,8 +85,8 @@ public: private: Core::System& system; - MemoryManager& memory_manager; VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; void ProcessData(u32 data); }; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 7713e10e2..8194a4b4a 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,11 +7,10 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/memory.h" #include "video_core/debug_utils/debug_utils.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" -#include "video_core/renderer_base.h" #include "video_core/textures/texture.h" namespace Tegra::Engines { @@ -21,8 +20,8 @@ constexpr u32 MacroRegistersStart = 0xE00; Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), system{system}, rasterizer{rasterizer}, - macro_interpreter(*this) { + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, macro_interpreter{ + *this} { InitializeRegisterDefaults(); } diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index f7ecee254..321af3297 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -16,13 +16,16 @@ #include "common/math_util.h" #include "video_core/gpu.h" #include "video_core/macro_interpreter.h" -#include "video_core/memory_manager.h" #include "video_core/textures/texture.h" namespace Core { class System; } +namespace Tegra { +class MemoryManager; +} + namespace VideoCore { class RasterizerInterface; } @@ -1103,7 +1106,6 @@ public: }; State state{}; - MemoryManager& memory_manager; struct DirtyFlags { std::bitset<8> color_buffer{0xFF}; @@ -1151,6 +1153,8 @@ private: VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; + /// Start offsets of each macro in macro_memory std::unordered_map<u32, u32> macro_offsets; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 5cca5c29a..2426d0067 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -5,9 +5,9 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" +#include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" #include "video_core/textures/decoders.h" @@ -16,7 +16,7 @@ namespace Tegra::Engines { MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager) - : memory_manager(memory_manager), system{system}, rasterizer{rasterizer} {} + : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method_call.method < Regs::NUM_REGS, diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 34c369320..c6b649842 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -10,12 +10,15 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "video_core/gpu.h" -#include "video_core/memory_manager.h" namespace Core { class System; } +namespace Tegra { +class MemoryManager; +} + namespace VideoCore { class RasterizerInterface; } @@ -139,13 +142,13 @@ public: }; } regs{}; - MemoryManager& memory_manager; - private: Core::System& system; VideoCore::RasterizerInterface& rasterizer; + MemoryManager& memory_manager; + /// Performs the copy from the source buffer to the destination buffer as configured in the /// registers. void HandleCopy(); diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 8b355cf7b..db507cf04 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -9,7 +9,7 @@ namespace VideoCommon { GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) - : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} + : Tegra::GPU(system, renderer), gpu_thread{system, renderer, *dma_pusher} {} GPUAsynch::~GPUAsynch() = default; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 086b2f625..cc56cf467 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,9 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "core/core.h" +#include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -36,7 +39,6 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { - state.DecrementFramesCounter(); renderer.SwapBuffers(std::move(data->framebuffer)); } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { renderer.Rasterizer().FlushRegion(data->addr, data->size); @@ -47,13 +49,18 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p } else { UNREACHABLE(); } + state.signaled_fence = next.fence; + state.TrySynchronize(); } } } -ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) - : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), - std::ref(dma_pusher), std::ref(state)} {} +ThreadManager::ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher) + : system{system}, thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)} { + synchronization_event = system.CoreTiming().RegisterEvent( + "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); +} ThreadManager::~ThreadManager() { // Notify GPU thread that a shutdown is pending @@ -62,14 +69,14 @@ ThreadManager::~ThreadManager() { } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - PushCommand(SubmitListCommand(std::move(entries))); + const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; + const s64 synchronization_ticks{Core::Timing::usToCycles(9000)}; + system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); } void ThreadManager::SwapBuffers( std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { - state.IncrementFramesCounter(); PushCommand(SwapBuffersCommand(std::move(framebuffer))); - state.WaitForFrames(); } void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { @@ -79,7 +86,7 @@ void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { if (state.queue.Empty()) { // It's quicker to invalidate a single region on the CPU if the queue is already empty - renderer.Rasterizer().InvalidateRegion(addr, size); + system.Renderer().Rasterizer().InvalidateRegion(addr, size); } else { PushCommand(InvalidateRegionCommand(addr, size)); } @@ -90,9 +97,25 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } -void ThreadManager::PushCommand(CommandData&& command_data) { - state.queue.Push(CommandDataContainer(std::move(command_data))); +u64 ThreadManager::PushCommand(CommandData&& command_data) { + const u64 fence{++state.last_fence}; + state.queue.Push(CommandDataContainer(std::move(command_data), fence)); state.SignalCommands(); + return fence; +} + +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); +void SynchState::WaitForSynchronization(u64 fence) { + if (signaled_fence >= fence) { + return; + } + + // Wait for the GPU to be idle (all commands to be executed) + { + MICROPROFILE_SCOPE(GPU_wait); + std::unique_lock<std::mutex> lock{synchronization_mutex}; + synchronization_condition.wait(lock, [this, fence] { return signaled_fence >= fence; }); + } } } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 8cd7db1c6..62bcea5bb 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -4,10 +4,8 @@ #pragma once -#include <array> #include <atomic> #include <condition_variable> -#include <memory> #include <mutex> #include <optional> #include <thread> @@ -21,9 +19,12 @@ struct FramebufferConfig; class DmaPusher; } // namespace Tegra -namespace VideoCore { -class RendererBase; -} // namespace VideoCore +namespace Core { +class System; +namespace Timing { +struct EventType; +} // namespace Timing +} // namespace Core namespace VideoCommon::GPUThread { @@ -77,81 +78,68 @@ using CommandData = struct CommandDataContainer { CommandDataContainer() = default; - CommandDataContainer(CommandData&& data) : data{std::move(data)} {} + CommandDataContainer(CommandData&& data, u64 next_fence) + : data{std::move(data)}, fence{next_fence} {} CommandDataContainer& operator=(const CommandDataContainer& t) { data = std::move(t.data); + fence = t.fence; return *this; } CommandData data; + u64 fence{}; }; /// Struct used to synchronize the GPU thread struct SynchState final { std::atomic_bool is_running{true}; std::atomic_int queued_frame_count{}; - std::mutex frames_mutex; + std::mutex synchronization_mutex; std::mutex commands_mutex; std::condition_variable commands_condition; - std::condition_variable frames_condition; + std::condition_variable synchronization_condition; - void IncrementFramesCounter() { - std::lock_guard<std::mutex> lock{frames_mutex}; - ++queued_frame_count; + /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU + /// synchronized. This is entirely empirical. + bool IsSynchronized() const { + constexpr std::size_t max_queue_gap{5}; + return queue.Size() <= max_queue_gap; } - void DecrementFramesCounter() { - { - std::lock_guard<std::mutex> lock{frames_mutex}; - --queued_frame_count; - - if (queued_frame_count) { - return; - } + void TrySynchronize() { + if (IsSynchronized()) { + std::lock_guard<std::mutex> lock{synchronization_mutex}; + synchronization_condition.notify_one(); } - frames_condition.notify_one(); } - void WaitForFrames() { - { - std::lock_guard<std::mutex> lock{frames_mutex}; - if (!queued_frame_count) { - return; - } - } - - // Wait for the GPU to be idle (all commands to be executed) - { - std::unique_lock<std::mutex> lock{frames_mutex}; - frames_condition.wait(lock, [this] { return !queued_frame_count; }); - } - } + void WaitForSynchronization(u64 fence); void SignalCommands() { - { - std::unique_lock<std::mutex> lock{commands_mutex}; - if (queue.Empty()) { - return; - } + if (queue.Empty()) { + return; } commands_condition.notify_one(); } void WaitForCommands() { - std::unique_lock<std::mutex> lock{commands_mutex}; + std::unique_lock lock{commands_mutex}; commands_condition.wait(lock, [this] { return !queue.Empty(); }); } using CommandQueue = Common::SPSCQueue<CommandDataContainer>; CommandQueue queue; + u64 last_fence{}; + std::atomic<u64> signaled_fence{}; }; /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher); ~ThreadManager(); /// Push GPU command entries to be processed @@ -172,12 +160,12 @@ public: private: /// Pushes a command to be executed by the GPU thread - void PushCommand(CommandData&& command_data); + u64 PushCommand(CommandData&& command_data); private: SynchState state; - VideoCore::RendererBase& renderer; - Tegra::DmaPusher& dma_pusher; + Core::System& system; + Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp index 64f75db43..524d9ea5a 100644 --- a/src/video_core/macro_interpreter.cpp +++ b/src/video_core/macro_interpreter.cpp @@ -223,27 +223,21 @@ void MacroInterpreter::ProcessResult(ResultOperation operation, u32 reg, u32 res } u32 MacroInterpreter::FetchParameter() { - ASSERT(next_parameter_index < parameters.size()); - return parameters[next_parameter_index++]; + return parameters.at(next_parameter_index++); } u32 MacroInterpreter::GetRegister(u32 register_id) const { - // Register 0 is supposed to always return 0. - if (register_id == 0) - return 0; - - ASSERT(register_id < registers.size()); - return registers[register_id]; + return registers.at(register_id); } void MacroInterpreter::SetRegister(u32 register_id, u32 value) { - // Register 0 is supposed to always return 0. NOP is implemented as a store to the zero - // register. - if (register_id == 0) + // Register 0 is hardwired as the zero register. + // Ensure no writes to it actually occur. + if (register_id == 0) { return; + } - ASSERT(register_id < registers.size()); - registers[register_id] = value; + registers.at(register_id) = value; } void MacroInterpreter::SetMethodAddress(u32 address) { diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index e76b59842..8417324ff 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -77,16 +77,17 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { return gpu_addr; } -GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) { +GPUVAddr MemoryManager::FindFreeRegion(GPUVAddr region_start, u64 size) const { // Find the first Free VMA. - const VMAHandle vma_handle{std::find_if(vma_map.begin(), vma_map.end(), [&](const auto& vma) { - if (vma.second.type != VirtualMemoryArea::Type::Unmapped) { - return false; - } + const VMAHandle vma_handle{ + std::find_if(vma_map.begin(), vma_map.end(), [region_start, size](const auto& vma) { + if (vma.second.type != VirtualMemoryArea::Type::Unmapped) { + return false; + } - const VAddr vma_end{vma.second.base + vma.second.size}; - return vma_end > region_start && vma_end >= region_start + size; - })}; + const VAddr vma_end{vma.second.base + vma.second.size}; + return vma_end > region_start && vma_end >= region_start + size; + })}; if (vma_handle == vma_map.end()) { return {}; @@ -99,12 +100,12 @@ bool MemoryManager::IsAddressValid(GPUVAddr addr) const { return (addr >> page_bits) < page_table.pointers.size(); } -std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) { +std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) const { if (!IsAddressValid(addr)) { return {}; } - VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]}; + const VAddr cpu_addr{page_table.backing_addr[addr >> page_bits]}; if (cpu_addr) { return cpu_addr + (addr & page_mask); } @@ -113,7 +114,7 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr) { } template <typename T> -T MemoryManager::Read(GPUVAddr addr) { +T MemoryManager::Read(GPUVAddr addr) const { if (!IsAddressValid(addr)) { return {}; } @@ -165,10 +166,10 @@ void MemoryManager::Write(GPUVAddr addr, T data) { } } -template u8 MemoryManager::Read<u8>(GPUVAddr addr); -template u16 MemoryManager::Read<u16>(GPUVAddr addr); -template u32 MemoryManager::Read<u32>(GPUVAddr addr); -template u64 MemoryManager::Read<u64>(GPUVAddr addr); +template u8 MemoryManager::Read<u8>(GPUVAddr addr) const; +template u16 MemoryManager::Read<u16>(GPUVAddr addr) const; +template u32 MemoryManager::Read<u32>(GPUVAddr addr) const; +template u64 MemoryManager::Read<u64>(GPUVAddr addr) const; template void MemoryManager::Write<u8>(GPUVAddr addr, u8 data); template void MemoryManager::Write<u16>(GPUVAddr addr, u16 data); template void MemoryManager::Write<u32>(GPUVAddr addr, u32 data); @@ -179,8 +180,22 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { return {}; } - u8* page_pointer{page_table.pointers[addr >> page_bits]}; - if (page_pointer) { + u8* const page_pointer{page_table.pointers[addr >> page_bits]}; + if (page_pointer != nullptr) { + return page_pointer + (addr & page_mask); + } + + LOG_ERROR(HW_GPU, "Unknown GetPointer @ 0x{:016X}", addr); + return {}; +} + +const u8* MemoryManager::GetPointer(GPUVAddr addr) const { + if (!IsAddressValid(addr)) { + return {}; + } + + const u8* const page_pointer{page_table.pointers[addr >> page_bits]}; + if (page_pointer != nullptr) { return page_pointer + (addr & page_mask); } @@ -188,7 +203,7 @@ u8* MemoryManager::GetPointer(GPUVAddr addr) { return {}; } -void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) { +void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const { std::memcpy(dest_buffer, GetPointer(src_addr), size); } void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size) { diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 34744bb27..178e2f655 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -50,17 +50,18 @@ public: GPUVAddr MapBufferEx(VAddr cpu_addr, u64 size); GPUVAddr MapBufferEx(VAddr cpu_addr, GPUVAddr addr, u64 size); GPUVAddr UnmapBuffer(GPUVAddr addr, u64 size); - std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr); + std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; template <typename T> - T Read(GPUVAddr addr); + T Read(GPUVAddr addr) const; template <typename T> void Write(GPUVAddr addr, T data); u8* GetPointer(GPUVAddr addr); + const u8* GetPointer(GPUVAddr addr) const; - void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size); + void ReadBlock(GPUVAddr src_addr, void* dest_buffer, std::size_t size) const; void WriteBlock(GPUVAddr dest_addr, const void* src_buffer, std::size_t size); void CopyBlock(GPUVAddr dest_addr, GPUVAddr src_addr, std::size_t size); @@ -127,7 +128,7 @@ private: void UpdatePageTableForVMA(const VirtualMemoryArea& vma); /// Finds a free (unmapped region) of the specified size starting at the specified address. - GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size); + GPUVAddr FindFreeRegion(GPUVAddr region_start, u64 size) const; private: static constexpr u64 page_bits{16}; diff --git a/src/video_core/rasterizer_cache.h b/src/video_core/rasterizer_cache.h index 9fc9f3056..291772186 100644 --- a/src/video_core/rasterizer_cache.h +++ b/src/video_core/rasterizer_cache.h @@ -71,8 +71,8 @@ private: bool is_registered{}; ///< Whether the object is currently registered with the cache bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory) u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing - CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space const u8* host_ptr{}; ///< Pointer to the memory backing this cached region + CacheAddr cache_addr{}; ///< Cache address memory, unique from emulated virtual address space }; template <class T> @@ -84,7 +84,7 @@ public: /// Write any cached resources overlapping the specified region back to memory void FlushRegion(CacheAddr addr, std::size_t size) { - std::lock_guard<std::recursive_mutex> lock{mutex}; + std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; for (auto& object : objects) { @@ -94,7 +94,7 @@ public: /// Mark the specified region as being invalidated void InvalidateRegion(CacheAddr addr, u64 size) { - std::lock_guard<std::recursive_mutex> lock{mutex}; + std::lock_guard lock{mutex}; const auto& objects{GetSortedObjectsFromRegion(addr, size)}; for (auto& object : objects) { @@ -108,7 +108,7 @@ public: /// Invalidates everything in the cache void InvalidateAll() { - std::lock_guard<std::recursive_mutex> lock{mutex}; + std::lock_guard lock{mutex}; while (interval_cache.begin() != interval_cache.end()) { Unregister(*interval_cache.begin()->second.begin()); @@ -133,7 +133,7 @@ protected: /// Register an object into the cache virtual void Register(const T& object) { - std::lock_guard<std::recursive_mutex> lock{mutex}; + std::lock_guard lock{mutex}; object->SetIsRegistered(true); interval_cache.add({GetInterval(object), ObjectSet{object}}); @@ -143,7 +143,7 @@ protected: /// Unregisters an object from the cache virtual void Unregister(const T& object) { - std::lock_guard<std::recursive_mutex> lock{mutex}; + std::lock_guard lock{mutex}; object->SetIsRegistered(false); rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); @@ -153,14 +153,14 @@ protected: /// Returns a ticks counter used for tracking when cached objects were last modified u64 GetModifiedTicks() { - std::lock_guard<std::recursive_mutex> lock{mutex}; + std::lock_guard lock{mutex}; return ++modified_ticks; } /// Flushes the specified object, updating appropriate cache state as needed void FlushObject(const T& object) { - std::lock_guard<std::recursive_mutex> lock{mutex}; + std::lock_guard lock{mutex}; if (!object->IsDirty()) { return; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index f75c65825..7989ec11b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,7 +7,6 @@ #include "common/alignment.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -15,8 +14,8 @@ namespace OpenGL { CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, std::size_t alignment, u8* host_ptr) - : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ - host_ptr} {} + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, + alignment{alignment} {} OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index 0fbfbad55..5842d6213 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -4,7 +4,6 @@ #include <glad/glad.h> -#include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" #include "video_core/renderer_opengl/gl_global_cache.h" @@ -15,7 +14,7 @@ namespace OpenGL { CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) - : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} { + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size} { buffer.Create(); // Bind and unbind the buffer so it gets allocated by the driver glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp index 2bcbd3da2..c3e94d917 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp @@ -7,7 +7,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/memory.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_primitive_assembler.h" diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h index 0e2e7dc36..4e87ce4d6 100644 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.h +++ b/src/video_core/renderer_opengl/gl_primitive_assembler.h @@ -4,11 +4,9 @@ #pragma once -#include <vector> #include <glad/glad.h> #include "common/common_types.h" -#include "video_core/memory_manager.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e06dfe43f..7ff1e6737 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -17,7 +17,6 @@ #include "common/microprofile.h" #include "common/scope_exit.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" @@ -26,7 +25,6 @@ #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#include "video_core/video_core.h" namespace OpenGL { @@ -100,11 +98,9 @@ struct FramebufferCacheKey { } }; -RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, - ScreenInfo& info) - : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, - emu_window{window}, system{system}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE) { +RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) + : res_cache{*this}, shader_cache{*this, system}, global_cache{*this}, system{system}, + screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { // Create sampler objects for (std::size_t i = 0; i < texture_samplers.size(); ++i) { texture_samplers[i].Create(); @@ -320,7 +316,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu.state.shader_stages[stage]); + ubo.SetFromRegs(gpu, stage); const GLintptr offset = buffer_cache.UploadHostMemory( &ubo, sizeof(ubo), static_cast<std::size_t>(uniform_buffer_alignment)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 30f3e8acb..54fbf48aa 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -12,15 +12,12 @@ #include <optional> #include <tuple> #include <utility> -#include <vector> #include <boost/icl/interval_map.hpp> -#include <boost/range/iterator_range.hpp> #include <glad/glad.h> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" @@ -29,10 +26,8 @@ #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" -#include "video_core/renderer_opengl/gl_stream_buffer.h" namespace Core { class System; @@ -50,8 +45,7 @@ struct FramebufferCacheKey; class RasterizerOpenGL : public VideoCore::RasterizerInterface { public: - explicit RasterizerOpenGL(Core::Frontend::EmuWindow& window, Core::System& system, - ScreenInfo& info); + explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info); ~RasterizerOpenGL() override; void DrawArrays() override; @@ -214,7 +208,6 @@ private: ShaderCacheOpenGL shader_cache; GlobalRegionCacheOpenGL global_cache; - Core::Frontend::EmuWindow& emu_window; Core::System& system; ScreenInfo& screen_info; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 0235317c0..7a3280620 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -13,7 +13,6 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/hle/kernel/process.h" -#include "core/memory.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/morton.h" @@ -562,8 +561,8 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac } CachedSurface::CachedSurface(const SurfaceParams& params) - : params{params}, gl_target{SurfaceTargetToGL(params.target)}, - cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} { + : RasterizerCacheObject{params.host_ptr}, params{params}, + gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} { const auto optional_cpu_addr{ Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h index e8073579f..ad4fd3ad2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h @@ -5,10 +5,9 @@ #pragma once #include <array> -#include <map> #include <memory> #include <string> -#include <unordered_set> +#include <tuple> #include <vector> #include "common/alignment.h" diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1f8eca6f0..7030db365 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -6,13 +6,11 @@ #include "common/assert.h" #include "common/hash.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/shader/shader_ir.h" @@ -215,9 +213,9 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) - : host_ptr{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, - program_type{program_type}, disk_cache{disk_cache}, - precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { + : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr}, + unique_identifier{unique_identifier}, program_type{program_type}, disk_cache{disk_cache}, + precompiled_programs{precompiled_programs} { const std::size_t code_size = CalculateProgramSize(program_code); const std::size_t code_size_b = @@ -245,9 +243,9 @@ CachedShader::CachedShader(VAddr cpu_addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, const PrecompiledPrograms& precompiled_programs, GLShader::ProgramResult result, u8* host_ptr) - : cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, program_type{program_type}, - disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ - host_ptr} { + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, unique_identifier{unique_identifier}, + program_type{program_type}, disk_cache{disk_cache}, precompiled_programs{ + precompiled_programs} { code = std::move(result.first); entries = result.second; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index fd1c85115..0cf8e0b3d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -5,21 +5,20 @@ #pragma once #include <array> +#include <atomic> #include <memory> #include <set> #include <tuple> #include <unordered_map> +#include <vector> #include <glad/glad.h> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_cache.h" -#include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" namespace Core { class System; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 11d1169f0..a1a51f226 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -69,10 +69,10 @@ public: shader_source += '\n'; } - std::string GenerateTemporal() { - std::string temporal = "tmp"; - temporal += std::to_string(temporal_index++); - return temporal; + std::string GenerateTemporary() { + std::string temporary = "tmp"; + temporary += std::to_string(temporary_index++); + return temporary; } std::string GetResult() { @@ -87,7 +87,7 @@ private: } std::string shader_source; - u32 temporal_index = 1; + u32 temporary_index = 1; }; /// Generates code to use for a swizzle operation. @@ -426,9 +426,14 @@ private: std::string Visit(Node node) { if (const auto operation = std::get_if<OperationNode>(node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); + if (operation_index >= operation_decompilers.size()) { + UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); + return {}; + } const auto decompiler = operation_decompilers[operation_index]; if (decompiler == nullptr) { - UNREACHABLE_MSG("Operation decompiler {} not defined", operation_index); + UNREACHABLE_MSG("Undefined operation: {}", operation_index); + return {}; } return (this->*decompiler)(*operation); @@ -540,7 +545,7 @@ private: } else if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access - const std::string final_offset = code.GenerateTemporal(); + const std::string final_offset = code.GenerateTemporary(); code.AddLine("uint " + final_offset + " = (ftou(" + Visit(offset) + ") / 4) & " + std::to_string(MAX_CONSTBUFFER_ELEMENTS - 1) + ';'); return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), @@ -587,9 +592,9 @@ private: // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; - const std::string temporal = code.GenerateTemporal(); - code.AddLine(precise + "float " + temporal + " = " + value + ';'); - return temporal; + const std::string temporary = code.GenerateTemporary(); + code.AddLine(precise + "float " + temporary + " = " + value + ';'); + return temporary; } std::string VisitOperand(Operation operation, std::size_t operand_index) { @@ -601,9 +606,9 @@ private: return Visit(operand); } - const std::string temporal = code.GenerateTemporal(); - code.AddLine("float " + temporal + " = " + Visit(operand) + ';'); - return temporal; + const std::string temporary = code.GenerateTemporary(); + code.AddLine("float " + temporary + " = " + Visit(operand) + ';'); + return temporary; } std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) { @@ -1196,11 +1201,12 @@ private: switch (meta->element) { case 0: case 1: - return "textureSize(" + sampler + ", " + lod + ')' + GetSwizzle(meta->element); + return "itof(int(textureSize(" + sampler + ", " + lod + ')' + + GetSwizzle(meta->element) + "))"; case 2: return "0"; case 3: - return "textureQueryLevels(" + sampler + ')'; + return "itof(textureQueryLevels(" + sampler + "))"; } UNREACHABLE(); return "0"; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 72aca4938..4e04ab2f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -5,7 +5,6 @@ #pragma once #include <array> -#include <set> #include <string> #include <utility> #include <vector> diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 82fc4d44b..d2d979997 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -4,13 +4,13 @@ #include <cstring> #include <fmt/format.h> -#include <lz4.h> #include "common/assert.h" #include "common/common_paths.h" #include "common/common_types.h" #include "common/file_util.h" #include "common/logging/log.h" +#include "common/lz4_compression.h" #include "common/scm_rev.h" #include "core/core.h" @@ -49,39 +49,6 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { return hash; } -template <typename T> -std::vector<u8> CompressData(const T* source, std::size_t source_size) { - if (source_size > LZ4_MAX_INPUT_SIZE) { - // Source size exceeds LZ4 maximum input size - return {}; - } - const auto source_size_int = static_cast<int>(source_size); - const int max_compressed_size = LZ4_compressBound(source_size_int); - std::vector<u8> compressed(max_compressed_size); - const int compressed_size = LZ4_compress_default(reinterpret_cast<const char*>(source), - reinterpret_cast<char*>(compressed.data()), - source_size_int, max_compressed_size); - if (compressed_size <= 0) { - // Compression failed - return {}; - } - compressed.resize(compressed_size); - return compressed; -} - -std::vector<u8> DecompressData(const std::vector<u8>& compressed, std::size_t uncompressed_size) { - std::vector<u8> uncompressed(uncompressed_size); - const int size_check = LZ4_decompress_safe(reinterpret_cast<const char*>(compressed.data()), - reinterpret_cast<char*>(uncompressed.data()), - static_cast<int>(compressed.size()), - static_cast<int>(uncompressed.size())); - if (static_cast<int>(uncompressed_size) != size_check) { - // Decompression failed - return {}; - } - return uncompressed; -} - } // namespace ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, @@ -292,7 +259,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - dump.binary = DecompressData(compressed_binary, binary_length); + dump.binary = Common::Compression::DecompressDataLZ4(compressed_binary, binary_length); if (dump.binary.empty()) { return {}; } @@ -321,7 +288,7 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn return {}; } - const std::vector<u8> code = DecompressData(compressed_code, code_size); + const std::vector<u8> code = Common::Compression::DecompressDataLZ4(compressed_code, code_size); if (code.empty()) { return {}; } @@ -507,7 +474,8 @@ void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::str if (!IsUsable()) return; - const std::vector<u8> compressed_code{CompressData(code.data(), code.size())}; + const std::vector<u8> compressed_code{Common::Compression::CompressDataLZ4HC( + reinterpret_cast<const u8*>(code.data()), code.size(), 9)}; if (compressed_code.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress GLSL code - skipping shader {:016x}", unique_identifier); @@ -537,7 +505,9 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - const std::vector<u8> compressed_binary = CompressData(binary.data(), binary.size()); + const std::vector<u8> compressed_binary = + Common::Compression::CompressDataLZ4HC(binary.data(), binary.size(), 9); + if (compressed_binary.empty()) { LOG_ERROR(Render_OpenGL, "Failed to compress binary program in shader={:016x}", usage.unique_identifier); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 7d96649af..8763d9c71 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -3,7 +3,6 @@ // Refer to the license.txt file included. #include <fmt/format.h> -#include "common/assert.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index fba8e681b..fad346b48 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -4,12 +4,9 @@ #pragma once -#include <array> -#include <string> #include <vector> #include "common/common_types.h" -#include "video_core/engines/shader_bytecode.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/shader/shader_ir.h" diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 6a30c28d2..eaf3e03a0 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -2,15 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include "core/core.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL::GLShader { -void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) { - const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); - const auto& regs = gpu.regs; - const auto& state = gpu.state; +using Tegra::Engines::Maxwell3D; + +void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) { + const auto& regs = maxwell.regs; + const auto& state = maxwell.state; // TODO(bunnei): Support more than one viewport viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; @@ -18,7 +18,7 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh u32 func = static_cast<u32>(regs.alpha_test_func); // Normalize the gl variants of opCompare to be the same as the normal variants - u32 op_gl_variant_base = static_cast<u32>(Tegra::Engines::Maxwell3D::Regs::ComparisonOp::Never); + const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never); if (func >= op_gl_variant_base) { func = func - op_gl_variant_base + 1U; } @@ -31,8 +31,9 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& sh // Assign in which stage the position has to be flipped // (the last stage before the fragment shader). - if (gpu.regs.shader_config[static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry)].enable) { - flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); + constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry); + if (maxwell.regs.shader_config[geometry_index].enable) { + flip_stage = geometry_index; } else { flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB); } diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 4970aafed..8eef2a920 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -12,14 +12,13 @@ namespace OpenGL::GLShader { -using Tegra::Engines::Maxwell3D; - /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned -// NOTE: Always keep a vec4 at the end. The GL spec is not clear whether the alignment at -// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. -// Not following that rule will cause problems on some AMD drivers. +/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at +/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. +/// Not following that rule will cause problems on some AMD drivers. struct MaxwellUniformData { - void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage); + void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage); + alignas(16) GLvec4 viewport_flip; struct alignas(16) { GLuint instance_id; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 5e3d862c6..d69cba9c3 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,7 +5,6 @@ #include <algorithm> #include <cstddef> #include <cstdlib> -#include <cstring> #include <memory> #include <glad/glad.h> #include "common/assert.h" @@ -266,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() { } // Initialize sRGB Usage OpenGLState::ClearsRGBUsed(); - rasterizer = std::make_unique<RasterizerOpenGL>(render_window, system, screen_info); + rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index eac51ecb3..02a9f5ecb 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -10,6 +10,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "core/memory.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -19,8 +20,8 @@ namespace Vulkan { CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, u8* host_ptr) - : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ - host_ptr} {} + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, + alignment{alignment} {} VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp new file mode 100644 index 000000000..08279e562 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -0,0 +1,210 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <limits> +#include <vector> + +#include "common/assert.h" +#include "common/logging/log.h" +#include "core/core.h" +#include "core/frontend/framebuffer_layout.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_swapchain.h" + +namespace Vulkan { + +namespace { +vk::SurfaceFormatKHR ChooseSwapSurfaceFormat(const std::vector<vk::SurfaceFormatKHR>& formats) { + if (formats.size() == 1 && formats[0].format == vk::Format::eUndefined) { + return {vk::Format::eB8G8R8A8Unorm, vk::ColorSpaceKHR::eSrgbNonlinear}; + } + const auto& found = std::find_if(formats.begin(), formats.end(), [](const auto& format) { + return format.format == vk::Format::eB8G8R8A8Unorm && + format.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear; + }); + return found != formats.end() ? *found : formats[0]; +} + +vk::PresentModeKHR ChooseSwapPresentMode(const std::vector<vk::PresentModeKHR>& modes) { + // Mailbox doesn't lock the application like fifo (vsync), prefer it + const auto& found = std::find_if(modes.begin(), modes.end(), [](const auto& mode) { + return mode == vk::PresentModeKHR::eMailbox; + }); + return found != modes.end() ? *found : vk::PresentModeKHR::eFifo; +} + +vk::Extent2D ChooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, + u32 height) { + constexpr auto undefined_size{std::numeric_limits<u32>::max()}; + if (capabilities.currentExtent.width != undefined_size) { + return capabilities.currentExtent; + } + vk::Extent2D extent = {width, height}; + extent.width = std::max(capabilities.minImageExtent.width, + std::min(capabilities.maxImageExtent.width, extent.width)); + extent.height = std::max(capabilities.minImageExtent.height, + std::min(capabilities.maxImageExtent.height, extent.height)); + return extent; +} +} // namespace + +VKSwapchain::VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device) + : surface{surface}, device{device} {} + +VKSwapchain::~VKSwapchain() = default; + +void VKSwapchain::Create(u32 width, u32 height) { + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + const auto physical_device = device.GetPhysical(); + + const vk::SurfaceCapabilitiesKHR capabilities{ + physical_device.getSurfaceCapabilitiesKHR(surface, dld)}; + if (capabilities.maxImageExtent.width == 0 || capabilities.maxImageExtent.height == 0) { + return; + } + + dev.waitIdle(dld); + Destroy(); + + CreateSwapchain(capabilities, width, height); + CreateSemaphores(); + CreateImageViews(); + + fences.resize(image_count, nullptr); +} + +void VKSwapchain::AcquireNextImage() { + const auto dev{device.GetLogical()}; + const auto& dld{device.GetDispatchLoader()}; + dev.acquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), + *present_semaphores[frame_index], {}, &image_index, dld); + + if (auto& fence = fences[image_index]; fence) { + fence->Wait(); + fence->Release(); + fence = nullptr; + } +} + +bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) { + const vk::Semaphore present_semaphore{*present_semaphores[frame_index]}; + const std::array<vk::Semaphore, 2> semaphores{present_semaphore, render_semaphore}; + const u32 wait_semaphore_count{render_semaphore ? 2U : 1U}; + const auto& dld{device.GetDispatchLoader()}; + const auto present_queue{device.GetPresentQueue()}; + bool recreated = false; + + const vk::PresentInfoKHR present_info(wait_semaphore_count, semaphores.data(), 1, + &swapchain.get(), &image_index, {}); + switch (const auto result = present_queue.presentKHR(&present_info, dld); result) { + case vk::Result::eSuccess: + break; + case vk::Result::eErrorOutOfDateKHR: + if (current_width > 0 && current_height > 0) { + Create(current_width, current_height); + recreated = true; + } + break; + default: + LOG_CRITICAL(Render_Vulkan, "Vulkan failed to present swapchain due to {}!", + vk::to_string(result)); + UNREACHABLE(); + } + + ASSERT(fences[image_index] == nullptr); + fences[image_index] = &fence; + frame_index = (frame_index + 1) % image_count; + return recreated; +} + +bool VKSwapchain::HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const { + // TODO(Rodrigo): Handle framebuffer pixel format changes + return framebuffer.width != current_width || framebuffer.height != current_height; +} + +void VKSwapchain::CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, + u32 height) { + const auto dev{device.GetLogical()}; + const auto& dld{device.GetDispatchLoader()}; + const auto physical_device{device.GetPhysical()}; + + const std::vector<vk::SurfaceFormatKHR> formats{ + physical_device.getSurfaceFormatsKHR(surface, dld)}; + + const std::vector<vk::PresentModeKHR> present_modes{ + physical_device.getSurfacePresentModesKHR(surface, dld)}; + + const vk::SurfaceFormatKHR surface_format{ChooseSwapSurfaceFormat(formats)}; + const vk::PresentModeKHR present_mode{ChooseSwapPresentMode(present_modes)}; + extent = ChooseSwapExtent(capabilities, width, height); + + current_width = extent.width; + current_height = extent.height; + + u32 requested_image_count{capabilities.minImageCount + 1}; + if (capabilities.maxImageCount > 0 && requested_image_count > capabilities.maxImageCount) { + requested_image_count = capabilities.maxImageCount; + } + + vk::SwapchainCreateInfoKHR swapchain_ci( + {}, surface, requested_image_count, surface_format.format, surface_format.colorSpace, + extent, 1, vk::ImageUsageFlagBits::eColorAttachment, {}, {}, {}, + capabilities.currentTransform, vk::CompositeAlphaFlagBitsKHR::eOpaque, present_mode, false, + {}); + + const u32 graphics_family{device.GetGraphicsFamily()}; + const u32 present_family{device.GetPresentFamily()}; + const std::array<u32, 2> queue_indices{graphics_family, present_family}; + if (graphics_family != present_family) { + swapchain_ci.imageSharingMode = vk::SharingMode::eConcurrent; + swapchain_ci.queueFamilyIndexCount = static_cast<u32>(queue_indices.size()); + swapchain_ci.pQueueFamilyIndices = queue_indices.data(); + } else { + swapchain_ci.imageSharingMode = vk::SharingMode::eExclusive; + } + + swapchain = dev.createSwapchainKHRUnique(swapchain_ci, nullptr, dld); + + images = dev.getSwapchainImagesKHR(*swapchain, dld); + image_count = static_cast<u32>(images.size()); + image_format = surface_format.format; +} + +void VKSwapchain::CreateSemaphores() { + const auto dev{device.GetLogical()}; + const auto& dld{device.GetDispatchLoader()}; + + present_semaphores.resize(image_count); + for (std::size_t i = 0; i < image_count; i++) { + present_semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld); + } +} + +void VKSwapchain::CreateImageViews() { + const auto dev{device.GetLogical()}; + const auto& dld{device.GetDispatchLoader()}; + + image_views.resize(image_count); + for (std::size_t i = 0; i < image_count; i++) { + const vk::ImageViewCreateInfo image_view_ci({}, images[i], vk::ImageViewType::e2D, + image_format, {}, + {vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1}); + image_views[i] = dev.createImageViewUnique(image_view_ci, nullptr, dld); + } +} + +void VKSwapchain::Destroy() { + frame_index = 0; + present_semaphores.clear(); + framebuffers.clear(); + image_views.clear(); + swapchain.reset(); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h new file mode 100644 index 000000000..2ad84f185 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -0,0 +1,92 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Vulkan { + +class VKDevice; +class VKFence; + +class VKSwapchain { +public: + explicit VKSwapchain(vk::SurfaceKHR surface, const VKDevice& device); + ~VKSwapchain(); + + /// Creates (or recreates) the swapchain with a given size. + void Create(u32 width, u32 height); + + /// Acquires the next image in the swapchain, waits as needed. + void AcquireNextImage(); + + /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be + /// recreated. Takes responsability for the ownership of fence. + bool Present(vk::Semaphore render_semaphore, VKFence& fence); + + /// Returns true when the framebuffer layout has changed. + bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; + + const vk::Extent2D& GetSize() const { + return extent; + } + + u32 GetImageCount() const { + return image_count; + } + + u32 GetImageIndex() const { + return image_index; + } + + vk::Image GetImageIndex(u32 index) const { + return images[index]; + } + + vk::ImageView GetImageViewIndex(u32 index) const { + return *image_views[index]; + } + + vk::Format GetImageFormat() const { + return image_format; + } + +private: + void CreateSwapchain(const vk::SurfaceCapabilitiesKHR& capabilities, u32 width, u32 height); + void CreateSemaphores(); + void CreateImageViews(); + + void Destroy(); + + const vk::SurfaceKHR surface; + const VKDevice& device; + + UniqueSwapchainKHR swapchain; + + u32 image_count{}; + std::vector<vk::Image> images; + std::vector<UniqueImageView> image_views; + std::vector<UniqueFramebuffer> framebuffers; + std::vector<VKFence*> fences; + std::vector<UniqueSemaphore> present_semaphores; + + u32 image_index{}; + u32 frame_index{}; + + vk::Format image_format{}; + vk::Extent2D extent{}; + + u32 current_width{}; + u32 current_height{}; +}; + +} // namespace Vulkan |