diff options
Diffstat (limited to 'src/video_core')
122 files changed, 1805 insertions, 2093 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3cd896a0f..3df54816d 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(host_shaders) + add_library(video_core STATIC buffer_cache/buffer_block.h buffer_cache/buffer_cache.h @@ -188,6 +190,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_blit_screen.h renderer_vulkan/vk_buffer_cache.cpp renderer_vulkan/vk_buffer_cache.h + renderer_vulkan/vk_command_pool.cpp + renderer_vulkan/vk_command_pool.h renderer_vulkan/vk_compute_pass.cpp renderer_vulkan/vk_compute_pass.h renderer_vulkan/vk_compute_pipeline.cpp @@ -202,6 +206,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_image.cpp renderer_vulkan/vk_image.h + renderer_vulkan/vk_master_semaphore.cpp + renderer_vulkan/vk_master_semaphore.h renderer_vulkan/vk_memory_manager.cpp renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_pipeline_cache.cpp @@ -212,8 +218,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp renderer_vulkan/vk_renderpass_cache.h - renderer_vulkan/vk_resource_manager.cpp - renderer_vulkan/vk_resource_manager.h + renderer_vulkan/vk_resource_pool.cpp + renderer_vulkan/vk_resource_pool.h renderer_vulkan/vk_sampler_cache.cpp renderer_vulkan/vk_sampler_cache.h renderer_vulkan/vk_scheduler.cpp @@ -244,6 +250,9 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad xbyak) +add_dependencies(video_core host_shaders) +target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) + if (ENABLE_VULKAN) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) @@ -264,5 +273,12 @@ endif() if (MSVC) target_compile_options(video_core PRIVATE /we4267) else() - target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion) + target_compile_options(video_core PRIVATE + -Werror=conversion + -Wno-error=sign-conversion + -Werror=switch + -Werror=unused-variable + -Werror=unused-but-set-variable + -Werror=class-memaccess + ) endif() diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b5dc68902..e7edd733f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -51,46 +51,43 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - auto& memory_manager = system.GPU().MemoryManager(); - const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); - if (!cpu_addr_opt) { + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { return GetEmptyBuffer(size); } - const VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. constexpr std::size_t max_stream_size = 0x800; if (use_fast_cbuf || size < max_stream_size) { - if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) { - const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size); + if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) { + const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size); if (use_fast_cbuf) { u8* dest; if (is_granular) { - dest = memory_manager.GetPointer(gpu_addr); + dest = gpu_memory.GetPointer(gpu_addr); } else { staging_buffer.resize(size); dest = staging_buffer.data(); - memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); + gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); } return ConstBufferUpload(dest, size); } if (is_granular) { - u8* const host_ptr = memory_manager.GetPointer(gpu_addr); + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) { std::memcpy(dest, host_ptr, size); }); } else { - return StreamBufferUpload( - size, alignment, [&memory_manager, gpu_addr, size](u8* dest) { - memory_manager.ReadBlockUnsafe(gpu_addr, dest, size); - }); + return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) { + gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size); + }); } } } - Buffer* const block = GetBlock(cpu_addr, size); - MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); + Buffer* const block = GetBlock(*cpu_addr, size); + MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size); if (!map) { return GetEmptyBuffer(size); } @@ -106,7 +103,7 @@ public: } } - return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()}; + return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -262,9 +259,11 @@ public: virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0; protected: - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr<StreamBuffer> stream_buffer) - : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {} + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, + std::unique_ptr<StreamBuffer> stream_buffer_) + : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, + stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {} ~BufferCache() = default; @@ -326,14 +325,13 @@ private: MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { - auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; - if (memory_manager.IsGranularRange(gpu_addr, size)) { - u8* host_ptr = memory_manager.GetPointer(gpu_addr); + if (gpu_memory.IsGranularRange(gpu_addr, size)) { + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); block->Upload(block->Offset(cpu_addr), size, host_ptr); } else { staging_buffer.resize(size); - memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); + gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); block->Upload(block->Offset(cpu_addr), size, staging_buffer.data()); } return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); @@ -392,7 +390,7 @@ private: continue; } staging_buffer.resize(size); - system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); + cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size); block->Upload(block->Offset(interval.lower()), size, staging_buffer.data()); } } @@ -431,7 +429,7 @@ private: const std::size_t size = map->end - map->start; staging_buffer.resize(size); block->Download(block->Offset(map->start), size, staging_buffer.data()); - system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); + cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -567,7 +565,8 @@ private: } VideoCore::RasterizerInterface& rasterizer; - Core::System& system; + Tegra::MemoryManager& gpu_memory; + Core::Memory::Memory& cpu_memory; std::unique_ptr<StreamBuffer> stream_buffer; BufferType stream_buffer_handle; diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ff10ff40d..9409c4075 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,13 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} +Fermi2D::Fermi2D() = default; + +Fermi2D::~Fermi2D() = default; + +void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, @@ -81,13 +87,13 @@ void Fermi2D::HandleSurfaceCopy() { const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2}; const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2, dst_blit_y2}; - Config copy_config; - copy_config.operation = regs.operation; - copy_config.filter = regs.blit_control.filter; - copy_config.src_rect = src_rect; - copy_config.dst_rect = dst_rect; - - if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { + const Config copy_config{ + .operation = regs.operation, + .filter = regs.blit_control.filter, + .src_rect = src_rect, + .dst_rect = dst_rect, + }; + if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 8f37d053f..0909709ec 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -34,8 +34,11 @@ namespace Tegra::Engines { class Fermi2D final : public EngineInterface { public: - explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); - ~Fermi2D() = default; + explicit Fermi2D(); + ~Fermi2D(); + + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; @@ -142,14 +145,14 @@ public: } regs{}; struct Config { - Operation operation; - Filter filter; + Operation operation{}; + Filter filter{}; Common::Rectangle<u32> src_rect; Common::Rectangle<u32> dst_rect; }; private: - VideoCore::RasterizerInterface& rasterizer; + VideoCore::RasterizerInterface* rasterizer; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a82b06a38..898370739 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -16,14 +16,15 @@ namespace Tegra::Engines { -KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ - memory_manager, - regs.upload} {} +KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {} KeplerCompute::~KeplerCompute() = default; +void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid KeplerCompute register, increase the size of the Regs structure"); @@ -104,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { } VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } void KeplerCompute::ProcessLaunch() { @@ -119,7 +120,7 @@ void KeplerCompute::ProcessLaunch() { const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - rasterizer.DispatchCompute(code_addr); + rasterizer->DispatchCompute(code_addr); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index b7f668d88..7f2500aab 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -42,10 +42,12 @@ namespace Tegra::Engines { class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { public: - explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); + explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + static constexpr std::size_t NumConstBuffers = 8; struct Regs { @@ -230,11 +232,6 @@ public: const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; private: - Core::System& system; - VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; - Upload::State upload_state; - void ProcessLaunch(); /// Retrieves information about a specific TIC entry from the TIC buffer. @@ -242,6 +239,11 @@ private: /// Retrieves information about a specific TSC entry from the TSC buffer. Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; + + Core::System& system; + MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; + Upload::State upload_state; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c01436295..57ebc785f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -22,14 +22,19 @@ using VideoCore::QueryType; /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; -Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, - macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} { +Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, + upload_state{memory_manager, regs.upload} { dirty.flags.flip(); InitializeRegisterDefaults(); } +Maxwell3D::~Maxwell3D() = default; + +void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + void Maxwell3D::InitializeRegisterDefaults() { // Initializes registers to their default values - what games expect them to be at boot. This is // for certain registers that may not be explicitly set by games. @@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { switch (method) { case MAXWELL3D_REG_INDEX(wait_for_idle): { - rasterizer.WaitForIdle(); + rasterizer->WaitForIdle(); break; } case MAXWELL3D_REG_INDEX(shadow_ram_control): { @@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() { const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { - rasterizer.Draw(is_indexed, true); + rasterizer->Draw(is_indexed, true); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() { switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: if (regs.query.query_get.fence == 1) { - rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); + rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); } else { StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); } @@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() { void Maxwell3D::ProcessCounterReset() { switch (regs.counter_reset) { case Regs::CounterReset::SampleCnt: - rasterizer.ResetCounter(QueryType::SamplesPassed); + rasterizer->ResetCounter(QueryType::SamplesPassed); break; default: LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", @@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() { const u32 increment = regs.sync_info.increment.Value(); [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); if (increment) { - rasterizer.SignalSyncPoint(sync_point); + rasterizer->SignalSyncPoint(sync_point); } } @@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() { const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; if (ShouldExecute()) { - rasterizer.Draw(is_indexed, false); + rasterizer->Draw(is_indexed, false); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -590,9 +595,9 @@ std::optional<u64> Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, - system.GPU().GetTicks()); - return {}; + rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); + return std::nullopt; default: LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", static_cast<u32>(regs.query.query_get.select.Value())); @@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() { regs.clear_buffers.R == regs.clear_buffers.B && regs.clear_buffers.R == regs.clear_buffers.A); - rasterizer.Clear(); + rasterizer->Clear(); } u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { @@ -752,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { } VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index c97eeb792..bc289c55d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -51,9 +51,11 @@ namespace Tegra::Engines { class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { public: - explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); - ~Maxwell3D() = default; + explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); + ~Maxwell3D(); + + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. @@ -1418,12 +1420,12 @@ public: return execute_on; } - VideoCore::RasterizerInterface& GetRasterizer() { - return rasterizer; + VideoCore::RasterizerInterface& Rasterizer() { + return *rasterizer; } - const VideoCore::RasterizerInterface& GetRasterizer() const { - return rasterizer; + const VideoCore::RasterizerInterface& Rasterizer() const { + return *rasterizer; } /// Notify a memory write has happened. @@ -1460,11 +1462,10 @@ private: void InitializeRegisterDefaults(); Core::System& system; - - VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; + /// Start offsets of each macro in macro_memory std::array<u32, 0x80> macro_positions = {}; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index e88290754..8fa359d0a 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -114,8 +114,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() { const u32 block_depth = src_params.block_size.depth; const size_t src_size = CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); - const size_t src_layer_size = - CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth); if (read_buffer.size() < src_size) { read_buffer.resize(src_size); diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index 72e2a33d5..ceec05459 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -41,30 +41,30 @@ struct Header { BitField<26, 1, u32> does_load_or_store; BitField<27, 1, u32> does_fp64; BitField<28, 4, u32> stream_out_mask; - } common0{}; + } common0; union { BitField<0, 24, u32> shader_local_memory_low_size; BitField<24, 8, u32> per_patch_attribute_count; - } common1{}; + } common1; union { BitField<0, 24, u32> shader_local_memory_high_size; BitField<24, 8, u32> threads_per_input_primitive; - } common2{}; + } common2; union { BitField<0, 24, u32> shader_local_memory_crs_size; BitField<24, 4, OutputTopology> output_topology; BitField<28, 4, u32> reserved; - } common3{}; + } common3; union { BitField<0, 12, u32> max_output_vertices; BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders. BitField<20, 4, u32> reserved; BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders. - } common4{}; + } common4; union { struct { @@ -145,7 +145,7 @@ struct Header { } } ps; - std::array<u32, 0xF> raw{}; + std::array<u32, 0xF> raw; }; u64 GetLocalMemorySize() const { @@ -153,7 +153,6 @@ struct Header { (common2.shader_local_memory_high_size << 24)); } }; - static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); } // namespace Tegra::Shader diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 8b2a6a42c..de6991ef6 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -5,15 +5,10 @@ #pragma once #include <algorithm> -#include <array> -#include <memory> #include <queue> -#include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/memory.h" -#include "core/settings.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -79,8 +74,6 @@ public: } void WaitPendingFences() { - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; while (!fences.empty()) { TFence& current_fence = fences.front(); if (ShouldWait()) { @@ -88,8 +81,8 @@ public: } PopAsyncFlushes(); if (current_fence->IsSemaphore()) { - memory_manager.template Write<u32>(current_fence->GetAddress(), - current_fence->GetPayload()); + gpu_memory.template Write<u32>(current_fence->GetAddress(), + current_fence->GetPayload()); } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } @@ -98,13 +91,13 @@ public: } protected: - FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - TTextureCache& texture_cache, TTBufferCache& buffer_cache, - TQueryCache& query_cache) - : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache}, - buffer_cache{buffer_cache}, query_cache{query_cache} {} + explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, + TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, + TQueryCache& query_cache_) + : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} - virtual ~FenceManager() {} + virtual ~FenceManager() = default; /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is /// true @@ -118,16 +111,15 @@ protected: /// Waits until a fence has been signalled by the host GPU. virtual void WaitFence(TFence& fence) = 0; - Core::System& system; VideoCore::RasterizerInterface& rasterizer; + Tegra::GPU& gpu; + Tegra::MemoryManager& gpu_memory; TTextureCache& texture_cache; TTBufferCache& buffer_cache; TQueryCache& query_cache; private: void TryReleasePendingFences() { - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; while (!fences.empty()) { TFence& current_fence = fences.front(); if (ShouldWait() && !IsFenceSignaled(current_fence)) { @@ -135,8 +127,8 @@ private: } PopAsyncFlushes(); if (current_fence->IsSemaphore()) { - memory_manager.template Write<u32>(current_fence->GetAddress(), - current_fence->GetPayload()); + gpu_memory.template Write<u32>(current_fence->GetAddress(), + current_fence->GetPayload()); } else { gpu.IncrementSyncPoint(current_fence->GetPayload()); } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 512578c8b..4bb9256e9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -27,21 +27,28 @@ namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async) - : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { - auto& rasterizer{renderer->Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); - dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this); - maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); - fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); - kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); - maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); - kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); - shader_notify = std::make_unique<VideoCore::ShaderNotify>(); -} +GPU::GPU(Core::System& system_, bool is_async_) + : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, + dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, + maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, + fermi_2d{std::make_unique<Engines::Fermi2D>()}, + kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, + maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, + kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, + shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} GPU::~GPU() = default; +void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { + renderer = std::move(renderer_); + + VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer(); + memory_manager->BindRasterizer(rasterizer); + maxwell_3d->BindRasterizer(rasterizer); + fermi_2d->BindRasterizer(rasterizer); + kepler_compute->BindRasterizer(rasterizer); +} + Engines::Maxwell3D& GPU::Maxwell3D() { return *maxwell_3d; } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ebfc7b0c7..2d15d1c6f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -142,11 +142,6 @@ class MemoryManager; class GPU { public: - explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - bool is_async); - - virtual ~GPU(); - struct MethodCall { u32 method{}; u32 argument{}; @@ -162,6 +157,12 @@ public: method_count(method_count) {} }; + explicit GPU(Core::System& system, bool is_async); + virtual ~GPU(); + + /// Binds a renderer to the GPU. + void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); + /// Calls a GPU method. void CallMethod(const MethodCall& method_call); @@ -345,13 +346,12 @@ private: bool ExecuteMethodOnEngine(u32 method); protected: - std::unique_ptr<Tegra::DmaPusher> dma_pusher; Core::System& system; + std::unique_ptr<Tegra::MemoryManager> memory_manager; + std::unique_ptr<Tegra::DmaPusher> dma_pusher; std::unique_ptr<VideoCore::RendererBase> renderer; private: - std::unique_ptr<Tegra::MemoryManager> memory_manager; - /// Mapping of command subchannels to their bound engine ids std::array<EngineID, 8> bound_engines = {}; /// 3D engine diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 7b855f63e..70a3d5738 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,16 +10,14 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer_), true), gpu_thread{system}, - cpu_context(renderer->GetRenderWindow().CreateSharedContext()), - gpu_context(std::move(context)) {} +GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); + gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); + cpu_context = renderer->GetRenderWindow().CreateSharedContext(); + cpu_context->MakeCurrent(); } void GPUAsynch::ObtainContext() { diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 15e9f1d38..f89c855a5 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -20,8 +20,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context); + explicit GPUAsynch(Core::System& system); ~GPUAsynch() override; void Start() override; @@ -42,7 +41,6 @@ protected: private: GPUThread::ThreadManager gpu_thread; std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; - std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index aaeb9811d..1ca47ddef 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,20 +7,18 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer), false), context{std::move(context)} {} +GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {} GPUSynch::~GPUSynch() = default; void GPUSynch::Start() {} void GPUSynch::ObtainContext() { - context->MakeCurrent(); + renderer->Context().MakeCurrent(); } void GPUSynch::ReleaseContext() { - context->DoneCurrent(); + renderer->Context().DoneCurrent(); } void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 762c20aa5..297258cb1 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -19,8 +19,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context); + explicit GPUSynch(Core::System& system); ~GPUSynch() override; void Start() override; @@ -36,9 +35,6 @@ public: protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, [[maybe_unused]] u32 value) const override {} - -private: - std::unique_ptr<Core::Frontend::GraphicsContext> context; }; } // namespace VideoCommon diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt new file mode 100644 index 000000000..aa62363a7 --- /dev/null +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -0,0 +1,43 @@ +set(SHADER_FILES + opengl_present.frag + opengl_present.vert +) + +set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) +set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) + +set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) +add_custom_command( + OUTPUT + ${SHADER_DIR} + COMMAND + ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR} +) + +set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) +set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) + +foreach(FILENAME IN ITEMS ${SHADER_FILES}) + string(REPLACE "." "_" SHADER_NAME ${FILENAME}) + set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) + set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) + add_custom_command( + OUTPUT + ${HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${HEADER_GENERATOR} + ${INPUT_FILE} + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) +endforeach() + +add_custom_target(host_shaders + DEPENDS + ${SHADER_HEADERS} + SOURCES + ${SHADER_FILES} +) diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake new file mode 100644 index 000000000..368bce0ed --- /dev/null +++ b/src/video_core/host_shaders/StringShaderHeader.cmake @@ -0,0 +1,11 @@ +set(SOURCE_FILE ${CMAKE_ARGV3}) +set(HEADER_FILE ${CMAKE_ARGV4}) +set(INPUT_FILE ${CMAKE_ARGV5}) + +get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME) +string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME}) +string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME) + +file(READ ${SOURCE_FILE} CONTENTS) + +configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY) diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag new file mode 100644 index 000000000..8a4cb024b --- /dev/null +++ b/src/video_core/host_shaders/opengl_present.frag @@ -0,0 +1,10 @@ +#version 430 core + +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (binding = 0) uniform sampler2D color_texture; + +void main() { + color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert new file mode 100644 index 000000000..2235d31a4 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present.vert @@ -0,0 +1,24 @@ +#version 430 core + +out gl_PerVertex { + vec4 gl_Position; +}; + +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; +layout (location = 0) out vec2 frag_tex_coord; + +// This is a truncated 3x3 matrix for 2D transformations: +// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. +// The third column performs translation. +// The third row could be used for projection, which we don't need in 2D. It hence is assumed to +// implicitly be [0, 0, 1] +layout (location = 0) uniform mat3x2 modelview_matrix; + +void main() { + // Multiply input position by the rotscale part of the matrix and then manually translate by + // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector + // to `vec3(vert_position.xy, 1.0)` + gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in new file mode 100644 index 000000000..ccdb0d2a9 --- /dev/null +++ b/src/video_core/host_shaders/source_shader.h.in @@ -0,0 +1,9 @@ +#pragma once + +#include <string_view> + +namespace HostShaders { + +constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)"; + +} // namespace HostShaders diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index a50e7b4e0..cd21a2112 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -36,7 +36,7 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, } } else { // Macro not compiled, check if it's uploaded and if so, compile it - std::optional<u32> mid_method = std::nullopt; + std::optional<u32> mid_method; const auto macro_code = uploaded_macro_code.find(method); if (macro_code == uploaded_macro_code.end()) { for (const auto& [method_base, code] : uploaded_macro_code) { diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 0c9ff59a4..df00b57df 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -24,7 +24,7 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.index_array.first = parameters[4]; if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, true); } maxwell3d.regs.index_array.count = 0; maxwell3d.mme_draw.instance_count = 0; @@ -42,7 +42,7 @@ void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.mme_draw.instance_count = count; if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(false, true); + maxwell3d.Rasterizer().Draw(false, true); } maxwell3d.regs.vertex_buffer.count = 0; maxwell3d.mme_draw.instance_count = 0; @@ -65,7 +65,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, true); } maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? maxwell3d.regs.index_array.count = 0; diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index aa5256419..bd01fd1f2 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -34,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho this->parameters = std::make_unique<u32[]>(num_parameters); } std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); - this->num_parameters = num_parameters; // Execute the code until we hit an exit condition. bool keep_executing = true; diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp index c1b9e4ad9..954b87515 100644 --- a/src/video_core/macro/macro_jit_x64.cpp +++ b/src/video_core/macro/macro_jit_x64.cpp @@ -14,11 +14,11 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255 MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0)); namespace Tegra { -static const Xbyak::Reg64 STATE = Xbyak::util::rbx; -static const Xbyak::Reg32 RESULT = Xbyak::util::ebp; -static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; -static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; -static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; +constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx; +constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp; +constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12; +constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d; +constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15; static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({ STATE, diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 844164645..02cf53d15 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -14,11 +14,15 @@ namespace Tegra { -MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer}, page_table(page_table_size) {} +MemoryManager::MemoryManager(Core::System& system_) + : system{system_}, page_table(page_table_size) {} MemoryManager::~MemoryManager() = default; +void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { u64 remaining_size{size}; for (u64 offset{}; offset < size; offset += page_size) { @@ -54,7 +58,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) { for (u64 offset{}; offset < size; offset += page_size) { if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) { - return {}; + return std::nullopt; } } @@ -131,13 +135,13 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size } } - return {}; + return std::nullopt; } std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { const auto page_entry{GetPageEntry(gpu_addr)}; if (!page_entry.IsValid()) { - return {}; + return std::nullopt; } return page_entry.ToAddress() + (gpu_addr & page_mask); @@ -217,7 +221,7 @@ void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::siz // Flush must happen on the rasterizer interface, such that memory is always synchronous // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(src_addr, copy_amount); + rasterizer->FlushRegion(src_addr, copy_amount); system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); } @@ -266,7 +270,7 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, s // Invalidate must happen on the rasterizer interface, such that memory is always // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(dest_addr, copy_amount); + rasterizer->InvalidateRegion(dest_addr, copy_amount); system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); } @@ -312,10 +316,10 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_add WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size); } -bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { +bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { const auto cpu_addr{GpuToCpuAddress(gpu_addr)}; if (!cpu_addr) { - return {}; + return false; } const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size}; return page <= Core::Memory::PAGE_SIZE; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 681bd9588..53c8d122a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -31,19 +31,19 @@ public: constexpr PageEntry(State state) : state{state} {} constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {} - constexpr bool IsUnmapped() const { + [[nodiscard]] constexpr bool IsUnmapped() const { return state == State::Unmapped; } - constexpr bool IsAllocated() const { + [[nodiscard]] constexpr bool IsAllocated() const { return state == State::Allocated; } - constexpr bool IsValid() const { + [[nodiscard]] constexpr bool IsValid() const { return !IsUnmapped() && !IsAllocated(); } - constexpr VAddr ToAddress() const { + [[nodiscard]] constexpr VAddr ToAddress() const { if (!IsValid()) { return {}; } @@ -51,7 +51,7 @@ public: return static_cast<VAddr>(state) << ShiftBits; } - constexpr PageEntry operator+(u64 offset) { + [[nodiscard]] constexpr PageEntry operator+(u64 offset) const { // If this is a reserved value, offsets do not apply if (!IsValid()) { return *this; @@ -68,19 +68,22 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large"); class MemoryManager final { public: - explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(Core::System& system); ~MemoryManager(); - std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; + /// Binds a renderer to the memory manager. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + + [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; template <typename T> - T Read(GPUVAddr addr) const; + [[nodiscard]] T Read(GPUVAddr addr) const; template <typename T> void Write(GPUVAddr addr, T data); - u8* GetPointer(GPUVAddr addr); - const u8* GetPointer(GPUVAddr addr) const; + [[nodiscard]] u8* GetPointer(GPUVAddr addr); + [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; /** * ReadBlock and WriteBlock are full read and write operations over virtual @@ -109,24 +112,24 @@ public: /** * IsGranularRange checks if a gpu region can be simply read with a pointer. */ - bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; - GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); - GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); - std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); - GPUVAddr Allocate(std::size_t size, std::size_t align); + [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); + [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); void Unmap(GPUVAddr gpu_addr, std::size_t size); private: - PageEntry GetPageEntry(GPUVAddr gpu_addr) const; + [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); - std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; + [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; void TryLockPage(PageEntry page_entry, std::size_t size); void TryUnlockPage(PageEntry page_entry, std::size_t size); - static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) { + [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) { return (gpu_addr >> page_bits) & page_table_mask; } @@ -141,7 +144,7 @@ private: Core::System& system; - VideoCore::RasterizerInterface& rasterizer; + VideoCore::RasterizerInterface* rasterizer = nullptr; std::vector<PageEntry> page_table; }; diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h index 0d3a88765..fc54ca0ef 100644 --- a/src/video_core/query_cache.h +++ b/src/video_core/query_cache.h @@ -91,14 +91,15 @@ private: std::shared_ptr<HostCounter> last; }; -template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter, - class QueryPool> +template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter> class QueryCacheBase { public: - explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer}, streams{{CounterStream{ - static_cast<QueryCache&>(*this), - VideoCore::QueryType::SamplesPassed}}} {} + explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_) + : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this), + VideoCore::QueryType::SamplesPassed}}} {} void InvalidateRegion(VAddr addr, std::size_t size) { std::unique_lock lock{mutex}; @@ -118,29 +119,27 @@ public: */ void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) { std::unique_lock lock{mutex}; - auto& memory_manager = system.GPU().MemoryManager(); - const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr_opt); - VAddr cpu_addr = *cpu_addr_opt; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); - CachedQuery* query = TryGet(cpu_addr); + CachedQuery* query = TryGet(*cpu_addr); if (!query) { - ASSERT_OR_EXECUTE(cpu_addr_opt, return;); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); + ASSERT_OR_EXECUTE(cpu_addr, return;); + u8* const host_ptr = gpu_memory.GetPointer(gpu_addr); - query = Register(type, cpu_addr, host_ptr, timestamp.has_value()); + query = Register(type, *cpu_addr, host_ptr, timestamp.has_value()); } query->BindCounter(Stream(type).Current(), timestamp); if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - AsyncFlushQuery(cpu_addr); + AsyncFlushQuery(*cpu_addr); } } /// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch. void UpdateCounters() { std::unique_lock lock{mutex}; - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable); } @@ -206,9 +205,6 @@ public: committed_flushes.pop_front(); } -protected: - std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; - private: /// Flushes a memory range to guest memory and removes it from the cache. void FlushAndRemoveRegion(VAddr addr, std::size_t size) { @@ -270,8 +266,9 @@ private: static constexpr std::uintptr_t PAGE_SIZE = 4096; static constexpr unsigned PAGE_BITS = 12; - Core::System& system; VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; std::recursive_mutex mutex; diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 3cbdac8e7..b3e0919f8 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -106,11 +106,8 @@ public: virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} /// Initialize disk cached resources for the game being emulated - virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, - const DiskResourceLoadCallback& callback = {}) {} - - /// Initializes renderer dirty flags - virtual void SetupDirtyFlags() {} + virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, + const DiskResourceLoadCallback& callback) {} /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver. GuestDriverProfile& AccessGuestDriverProfile() { diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index dfb06e87e..a93a1732c 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -9,7 +9,9 @@ namespace VideoCore { -RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} { +RendererBase::RendererBase(Core::Frontend::EmuWindow& window_, + std::unique_ptr<Core::Frontend::GraphicsContext> context_) + : render_window{window_}, context{std::move(context_)} { RefreshBaseSettings(); } diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 1d85219b6..5c650808b 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -15,7 +15,8 @@ namespace Core::Frontend { class EmuWindow; -} +class GraphicsContext; +} // namespace Core::Frontend namespace VideoCore { @@ -25,14 +26,15 @@ struct RendererSettings { // Screenshot std::atomic<bool> screenshot_requested{false}; - void* screenshot_bits; + void* screenshot_bits{}; std::function<void()> screenshot_complete_callback; Layout::FramebufferLayout screenshot_framebuffer_layout; }; class RendererBase : NonCopyable { public: - explicit RendererBase(Core::Frontend::EmuWindow& window); + explicit RendererBase(Core::Frontend::EmuWindow& window, + std::unique_ptr<Core::Frontend::GraphicsContext> context); virtual ~RendererBase(); /// Initialize the renderer @@ -44,11 +46,6 @@ public: /// Finalize rendering the guest frame and draw into the presentation texture virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; - /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer - /// specific implementation) - /// Returns true if a frame was drawn - virtual bool TryPresent(int timeout_ms) = 0; - // Getter/setter functions: // ------------------------ @@ -68,6 +65,14 @@ public: return *rasterizer; } + Core::Frontend::GraphicsContext& Context() { + return *context; + } + + const Core::Frontend::GraphicsContext& Context() const { + return *context; + } + Core::Frontend::EmuWindow& GetRenderWindow() { return render_window; } @@ -94,6 +99,7 @@ public: protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<RasterizerInterface> rasterizer; + std::unique_ptr<Core::Frontend::GraphicsContext> context; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index e866d8f2f..b1c4cd62f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -59,9 +59,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); } -OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, +OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const Device& device_, std::size_t stream_size) - : GenericBufferCache{rasterizer, system, + : GenericBufferCache{rasterizer, gpu_memory, cpu_memory, std::make_unique<OGLStreamBuffer>(device_, stream_size, true)}, device{device_} { if (!device.HasFastBufferSubData()) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 88fdc0536..f75b32e31 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -52,7 +52,8 @@ private: using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>; class OGLBufferCache final : public GenericBufferCache { public: - explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, + explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const Device& device, std::size_t stream_size); ~OGLBufferCache(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index e7d95149f..a94e4f72e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -193,7 +193,6 @@ bool IsASTCSupported() { Device::Device() : max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} { const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR)); - const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER)); const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION)); const std::vector extensions = GetExtensions(); diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index ec5421afa..b532fdcc2 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -4,16 +4,17 @@ #include "common/assert.h" +#include <glad/glad.h> + #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_fence_manager.h" namespace OpenGL { -GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {} +GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {} GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {} + : FenceBase(address, payload, is_stubbed) {} GLInnerFence::~GLInnerFence() = default; @@ -44,11 +45,10 @@ void GLInnerFence::Wait() { glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED); } -FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system, - VideoCore::RasterizerInterface& rasterizer, +FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, QueryCache& query_cache) - : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {} + : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {} Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { return std::make_shared<GLInnerFence>(value, is_stubbed); diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index c917b3343..da1dcdace 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -5,7 +5,6 @@ #pragma once #include <memory> -#include <glad/glad.h> #include "common/common_types.h" #include "video_core/fence_manager.h" @@ -38,9 +37,9 @@ using GenericFenceManager = class FenceManagerOpenGL final : public GenericFenceManager { public: - FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, - QueryCache& query_cache); + explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache, + QueryCache& query_cache); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index d7ba57aca..1a3d9720e 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -30,12 +30,11 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) { } // Anonymous namespace -QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer) - : VideoCommon::QueryCacheBase< - QueryCache, CachedQuery, CounterStream, HostCounter, - std::vector<OGLQuery>>{system, - static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)}, - gl_rasterizer{gl_rasterizer} {} +QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory) + : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>( + rasterizer, maxwell3d, gpu_memory), + gl_rasterizer{rasterizer} {} QueryCache::~QueryCache() = default; @@ -90,6 +89,8 @@ u64 HostCounter::BlockingQuery() const { CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr) : VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {} +CachedQuery::~CachedQuery() = default; + CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h index d8e7052a1..82cac51ee 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.h +++ b/src/video_core/renderer_opengl/gl_query_cache.h @@ -26,10 +26,11 @@ class RasterizerOpenGL; using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>; -class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, - HostCounter, std::vector<OGLQuery>> { +class QueryCache final + : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> { public: - explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer); + explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory); ~QueryCache(); OGLQuery AllocateQuery(VideoCore::QueryType type); @@ -40,6 +41,7 @@ public: private: RasterizerOpenGL& gl_rasterizer; + std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools; }; class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> { @@ -62,10 +64,12 @@ class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> { public: explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr); - CachedQuery(CachedQuery&& rhs) noexcept; - CachedQuery(const CachedQuery&) = delete; + ~CachedQuery() override; + CachedQuery(CachedQuery&& rhs) noexcept; CachedQuery& operator=(CachedQuery&& rhs) noexcept; + + CachedQuery(const CachedQuery&) = delete; CachedQuery& operator=(const CachedQuery&) = delete; void Flush() override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 4af5824cd..bbb2eb17c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -153,16 +153,19 @@ void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t nu } // Anonymous namespace -RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - const Device& device, ScreenInfo& info, - ProgramManager& program_manager, StateTracker& state_tracker) - : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device, - state_tracker}, - shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, - buffer_cache{*this, system, device, STREAM_BUFFER_SIZE}, - fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system}, - screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, - async_shaders{emu_window} { +RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, + Core::Memory::Memory& cpu_memory, const Device& device_, + ScreenInfo& screen_info_, ProgramManager& program_manager_, + StateTracker& state_tracker_) + : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()), + kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_), + screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_), + texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker), + shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device), + query_cache(*this, maxwell3d, gpu_memory), + buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE), + fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), + async_shaders(emu_window) { CheckExtensions(); unified_uniform_buffer.Create(); @@ -196,8 +199,7 @@ void RasterizerOpenGL::CheckExtensions() { } void RasterizerOpenGL::SetupVertexFormat() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexFormats]) { return; } @@ -217,7 +219,7 @@ void RasterizerOpenGL::SetupVertexFormat() { } flags[Dirty::VertexFormat0 + index] = false; - const auto attrib = gpu.regs.vertex_attrib_format[index]; + const auto attrib = maxwell3d.regs.vertex_attrib_format[index]; const auto gl_index = static_cast<GLuint>(index); // Disable constant attributes. @@ -241,8 +243,7 @@ void RasterizerOpenGL::SetupVertexFormat() { } void RasterizerOpenGL::SetupVertexBuffer() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexBuffers]) { return; } @@ -253,7 +254,7 @@ void RasterizerOpenGL::SetupVertexBuffer() { const bool use_unified_memory = device.HasVertexBufferUnifiedMemory(); // Upload all guest vertex arrays sequentially to our buffer - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) { if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -290,14 +291,13 @@ void RasterizerOpenGL::SetupVertexBuffer() { } void RasterizerOpenGL::SetupVertexInstances() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::VertexInstances]) { return; } flags[Dirty::VertexInstances] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) { if (!flags[Dirty::VertexInstance0 + index]) { continue; @@ -313,7 +313,7 @@ void RasterizerOpenGL::SetupVertexInstances() { GLintptr RasterizerOpenGL::SetupIndexBuffer() { MICROPROFILE_SCOPE(OpenGL_Index); - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t size = CalculateIndexBufferSize(); const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle); @@ -322,15 +322,14 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); - auto& gpu = system.GPU().Maxwell3D(); u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - const auto& shader_config = gpu.regs.shader_config[index]; + const auto& shader_config = maxwell3d.regs.shader_config[index]; const auto program{static_cast<Maxwell::ShaderProgram>(index)}; // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { switch (program) { case Maxwell::ShaderProgram::Geometry: program_manager.UseGeometryShader(0); @@ -391,11 +390,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } SyncClipEnabled(clip_distances); - gpu.dirty.flags[Dirty::Shaders] = false; + maxwell3d.dirty.flags[Dirty::Shaders] = false; } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; std::size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { @@ -413,34 +412,27 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { } std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; - - return static_cast<std::size_t>(regs.index_array.count) * - static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); + return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * + static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); } -void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, +void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { - shader_cache.LoadDiskCache(stop_loading, callback); -} - -void RasterizerOpenGL::SetupDirtyFlags() { - state_tracker.Initialize(); + shader_cache.LoadDiskCache(title_id, stop_loading, callback); } void RasterizerOpenGL::ConfigureFramebuffers() { MICROPROFILE_SCOPE(OpenGL_Framebuffer); - auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) { + if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) { return; } - gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; + maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; texture_cache.GuardRenderTargets(true); View depth_surface = texture_cache.GetDepthBufferSurface(true); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); // Bind the framebuffer surfaces @@ -472,8 +464,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() { } void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) { - auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; texture_cache.GuardRenderTargets(true); View color_surface; @@ -523,12 +514,11 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_de } void RasterizerOpenGL::Clear() { - const auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.ShouldExecute()) { + if (!maxwell3d.ShouldExecute()) { return; } - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; bool use_color{}; bool use_depth{}; bool use_stencil{}; @@ -593,7 +583,6 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); - auto& gpu = system.GPU().Maxwell3D(); query_cache.UpdateCounters(); @@ -641,7 +630,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { if (invalidated) { // When the stream buffer has been invalidated, we have to consider vertex buffers as dirty - auto& dirty = gpu.dirty.flags; + auto& dirty = maxwell3d.dirty.flags; dirty[Dirty::VertexBuffers] = true; for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) { dirty[index] = true; @@ -662,7 +651,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup emulation uniform buffer. if (!device.UseAssemblyShaders()) { MaxwellUniformData ubo; - ubo.SetFromRegs(gpu); + ubo.SetFromRegs(maxwell3d); const auto info = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset, @@ -671,7 +660,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Setup shaders and their used resources. texture_cache.GuardSamplers(true); - const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology); + const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); SetupShaders(primitive_mode); texture_cache.GuardSamplers(false); @@ -688,14 +677,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(primitive_mode); - const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); + const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance); const GLsizei num_instances = - static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1); + static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1); if (is_indexed) { - const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base); - const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count); + const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base); + const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count); const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset); - const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format); + const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format); if (num_instances == 1 && base_instance == 0 && base_vertex == 0) { glDrawElements(primitive_mode, num_vertices, format, offset); } else if (num_instances == 1 && base_instance == 0) { @@ -714,8 +703,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { base_instance); } } else { - const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first); - const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count); + const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first); + const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count); if (num_instances == 1 && base_instance == 0) { glDrawArrays(primitive_mode, base_vertex, num_vertices); } else if (base_instance == 0) { @@ -730,7 +719,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { ++num_queued_commands; - system.GPU().TickWork(); + gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { @@ -753,7 +742,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { buffer_cache.Unmap(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; + program_manager.BindCompute(kernel->GetHandle()); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -815,17 +805,14 @@ void RasterizerOpenGL::SyncGuestHost() { } void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { - auto& memory_manager{gpu.MemoryManager()}; - memory_manager.Write<u32>(addr, value); + gpu_memory.Write<u32>(addr, value); return; } fence_manager.SignalSemaphore(addr, value); } void RasterizerOpenGL::SignalSyncPoint(u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { gpu.IncrementSyncPoint(value); return; @@ -834,7 +821,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) { } void RasterizerOpenGL::ReleaseFences() { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { return; } @@ -920,7 +906,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV}; MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& stages = system.GPU().Maxwell3D().state.shader_stages; + const auto& stages = maxwell3d.state.shader_stages; const auto& shader_stage = stages[stage_index]; const auto& entries = shader->GetEntries(); const bool use_unified = entries.use_unified_uniforms; @@ -945,7 +931,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) { MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; const auto& entries = kernel->GetEntries(); const bool use_unified = entries.use_unified_uniforms; @@ -1018,9 +1004,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV, }; - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; - const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + const auto& cbufs{maxwell3d.state.shader_stages[stage_index]}; const auto& entries{shader->GetEntries().global_memory_entries}; std::array<GLuint64EXT, 32> pointers; @@ -1030,8 +1014,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer; for (const auto& entry : entries) { const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset}; - const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; - const u32 size{memory_manager.Read<u32>(addr + 8)}; + const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; + const u32 size{gpu_memory.Read<u32>(addr + 8)}; SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); ++binding; } @@ -1041,9 +1025,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh } void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { - auto& gpu{system.GPU()}; - auto& memory_manager{gpu.MemoryManager()}; - const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; const auto& entries{kernel->GetEntries().global_memory_entries}; std::array<GLuint64EXT, 32> pointers; @@ -1052,8 +1034,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) { u32 binding = 0; for (const auto& entry : entries) { const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset}; - const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)}; - const u32 size{memory_manager.Read<u32>(addr + 8)}; + const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)}; + const u32 size{gpu_memory.Read<u32>(addr + 8)}; SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]); ++binding; } @@ -1077,7 +1059,6 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) { MICROPROFILE_SCOPE(OpenGL_Texture); - const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; for (const auto& entry : shader->GetEntries().samplers) { const auto shader_type = static_cast<ShaderType>(stage_index); @@ -1090,11 +1071,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); - const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : kernel->GetEntries().samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); + const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i); SetupTexture(binding++, texture, entry); } } @@ -1118,20 +1098,18 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu } void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) { - const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).image; for (const auto& entry : shader->GetEntries().images) { - const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); + const auto shader_type = static_cast<ShaderType>(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; SetupImage(binding++, tic, entry); } } void RasterizerOpenGL::SetupComputeImages(Shader* shader) { - const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; for (const auto& entry : shader->GetEntries().images) { - const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; + const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic; SetupImage(binding++, tic, entry); } } @@ -1151,9 +1129,8 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t } void RasterizerOpenGL::SyncViewport() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; - const auto& regs = gpu.regs; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; const bool dirty_viewport = flags[Dirty::Viewports]; const bool dirty_clip_control = flags[Dirty::ClipControl]; @@ -1225,25 +1202,23 @@ void RasterizerOpenGL::SyncViewport() { } void RasterizerOpenGL::SyncDepthClamp() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::DepthClampEnabled]) { return; } flags[Dirty::DepthClampEnabled] = false; - oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0); + oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0); } void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { return; } flags[Dirty::ClipDistances] = false; - clip_mask &= gpu.regs.clip_distance_enabled; + clip_mask &= maxwell3d.regs.clip_distance_enabled; if (clip_mask == last_clip_distance_mask) { return; } @@ -1259,9 +1234,8 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; - const auto& regs = gpu.regs; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; if (flags[Dirty::CullTest]) { flags[Dirty::CullTest] = false; @@ -1276,26 +1250,24 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncPrimitiveRestart() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PrimitiveRestart]) { return; } flags[Dirty::PrimitiveRestart] = false; - if (gpu.regs.primitive_restart.enabled) { + if (maxwell3d.regs.primitive_restart.enabled) { glEnable(GL_PRIMITIVE_RESTART); - glPrimitiveRestartIndex(gpu.regs.primitive_restart.index); + glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index); } else { glDisable(GL_PRIMITIVE_RESTART); } } void RasterizerOpenGL::SyncDepthTestState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; - const auto& regs = gpu.regs; if (flags[Dirty::DepthMask]) { flags[Dirty::DepthMask] = false; glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE); @@ -1313,14 +1285,13 @@ void RasterizerOpenGL::SyncDepthTestState() { } void RasterizerOpenGL::SyncStencilTestState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::StencilTest]) { return; } flags[Dirty::StencilTest] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_STENCIL_TEST, regs.stencil_enable); glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func), @@ -1345,25 +1316,24 @@ void RasterizerOpenGL::SyncStencilTestState() { } void RasterizerOpenGL::SyncRasterizeEnable() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::RasterizeEnable]) { return; } flags[Dirty::RasterizeEnable] = false; - oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); + oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0); } void RasterizerOpenGL::SyncPolygonModes() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PolygonModes]) { return; } flags[Dirty::PolygonModes] = false; - if (gpu.regs.fill_rectangle) { + const auto& regs = maxwell3d.regs; + if (regs.fill_rectangle) { if (!GLAD_GL_NV_fill_rectangle) { LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported"); glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); @@ -1376,27 +1346,26 @@ void RasterizerOpenGL::SyncPolygonModes() { return; } - if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) { + if (regs.polygon_mode_front == regs.polygon_mode_back) { flags[Dirty::PolygonModeFront] = false; flags[Dirty::PolygonModeBack] = false; - glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); + glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front)); return; } if (flags[Dirty::PolygonModeFront]) { flags[Dirty::PolygonModeFront] = false; - glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); + glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front)); } if (flags[Dirty::PolygonModeBack]) { flags[Dirty::PolygonModeBack] = false; - glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back)); + glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back)); } } void RasterizerOpenGL::SyncColorMask() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::ColorMasks]) { return; } @@ -1405,7 +1374,7 @@ void RasterizerOpenGL::SyncColorMask() { const bool force = flags[Dirty::ColorMaskCommon]; flags[Dirty::ColorMaskCommon] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; if (regs.color_mask_common) { if (!force && !flags[Dirty::ColorMask0]) { return; @@ -1430,33 +1399,30 @@ void RasterizerOpenGL::SyncColorMask() { } void RasterizerOpenGL::SyncMultiSampleState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::MultisampleControl]) { return; } flags[Dirty::MultisampleControl] = false; - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage); oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one); } void RasterizerOpenGL::SyncFragmentColorClampState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::FragmentClampColor]) { return; } flags[Dirty::FragmentClampColor] = false; - glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); + glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); } void RasterizerOpenGL::SyncBlendState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; - const auto& regs = gpu.regs; + auto& flags = maxwell3d.dirty.flags; + const auto& regs = maxwell3d.regs; if (flags[Dirty::BlendColor]) { flags[Dirty::BlendColor] = false; @@ -1513,14 +1479,13 @@ void RasterizerOpenGL::SyncBlendState() { } void RasterizerOpenGL::SyncLogicOpState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::LogicOp]) { return; } flags[Dirty::LogicOp] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; if (regs.logic_op.enable) { glEnable(GL_COLOR_LOGIC_OP); glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation)); @@ -1530,14 +1495,13 @@ void RasterizerOpenGL::SyncLogicOpState() { } void RasterizerOpenGL::SyncScissorTest() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::Scissors]) { return; } flags[Dirty::Scissors] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { if (!flags[Dirty::Scissor0 + index]) { continue; @@ -1556,16 +1520,15 @@ void RasterizerOpenGL::SyncScissorTest() { } void RasterizerOpenGL::SyncPointState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PointSize]) { return; } flags[Dirty::PointSize] = false; - oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable); + oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable); - if (gpu.regs.vp_point_size.enable) { + if (maxwell3d.regs.vp_point_size.enable) { // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. glEnable(GL_PROGRAM_POINT_SIZE); return; @@ -1573,32 +1536,30 @@ void RasterizerOpenGL::SyncPointState() { // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). - glPointSize(std::max(1.0f, gpu.regs.point_size)); + glPointSize(std::max(1.0f, maxwell3d.regs.point_size)); glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncLineState() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::LineWidth]) { return; } flags[Dirty::LineWidth] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable); glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased); } void RasterizerOpenGL::SyncPolygonOffset() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::PolygonOffset]) { return; } flags[Dirty::PolygonOffset] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable); oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable); oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable); @@ -1612,14 +1573,13 @@ void RasterizerOpenGL::SyncPolygonOffset() { } void RasterizerOpenGL::SyncAlphaTest() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::AlphaTest]) { return; } flags[Dirty::AlphaTest] = false; - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; if (regs.alpha_test_enabled && regs.rt_control.count > 1) { LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); } @@ -1633,20 +1593,19 @@ void RasterizerOpenGL::SyncAlphaTest() { } void RasterizerOpenGL::SyncFramebufferSRGB() { - auto& gpu = system.GPU().Maxwell3D(); - auto& flags = gpu.dirty.flags; + auto& flags = maxwell3d.dirty.flags; if (!flags[Dirty::FramebufferSRGB]) { return; } flags[Dirty::FramebufferSRGB] = false; - oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); + oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb); } void RasterizerOpenGL::SyncTransformFeedback() { // TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal // when this is required. - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; static constexpr std::size_t STRIDE = 3; std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs; @@ -1698,7 +1657,7 @@ void RasterizerOpenGL::SyncTransformFeedback() { } void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -1741,7 +1700,7 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { } void RasterizerOpenGL::EndTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index ccc6f50f6..f451404b2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -36,8 +36,8 @@ #include "video_core/shader/async_shaders.h" #include "video_core/textures/texture.h" -namespace Core { -class System; +namespace Core::Memory { +class Memory; } namespace Core::Frontend { @@ -55,9 +55,10 @@ struct DrawParameters; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - const Device& device, ScreenInfo& info, - ProgramManager& program_manager, StateTracker& state_tracker); + explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + Core::Memory::Memory& cpu_memory, const Device& device, + ScreenInfo& screen_info, ProgramManager& program_manager, + StateTracker& state_tracker); ~RasterizerOpenGL() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -83,9 +84,8 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - void LoadDiskResources(const std::atomic_bool& stop_loading, + void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; - void SetupDirtyFlags() override; /// Returns true when there are commands queued to the OpenGL server. bool AnyCommandQueued() const { @@ -237,7 +237,15 @@ private: void SetupShaders(GLenum primitive_mode); + Tegra::GPU& gpu; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + const Device& device; + ScreenInfo& screen_info; + ProgramManager& program_manager; + StateTracker& state_tracker; TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; @@ -247,10 +255,6 @@ private: OGLBufferCache buffer_cache; FenceManagerOpenGL fence_manager; - Core::System& system; - ScreenInfo& screen_info; - ProgramManager& program_manager; - StateTracker& state_tracker; VideoCommon::Shader::AsyncShaders async_shaders; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index a787e27d2..0ebcec427 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <string_view> #include <utility> #include <glad/glad.h> #include "common/common_types.h" @@ -82,11 +83,13 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(const char* source, GLenum type) { - if (handle != 0) +void OGLShader::Create(std::string_view source, GLenum type) { + if (handle != 0) { return; - if (source == nullptr) + } + if (source.empty()) { return; + } MICROPROFILE_SCOPE(OpenGL_ResourceCreation); handle = GLShader::LoadShader(source, type); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index b05cb641c..f48398669 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -4,6 +4,7 @@ #pragma once +#include <string_view> #include <utility> #include <glad/glad.h> #include "common/common_types.h" @@ -127,7 +128,7 @@ public: return *this; } - void Create(const char* source, GLenum type); + void Create(std::string_view source, GLenum type); void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eb49a36bf..bd56bed0c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" @@ -238,12 +239,11 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory( ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) { const auto shader_type = GetShaderType(program_type); - auto& gpu = params.system.GPU(); + auto& gpu = params.gpu; gpu.ShaderNotify().MarkSharderBuilding(); auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D()); - if (!async_shaders.IsShaderAsync(params.system.GPU()) || - !params.device.UseAsynchronousShaders()) { + if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) { const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); // TODO(Rodrigo): Handle VertexA shaders // std::optional<ShaderIR> ir_b; @@ -286,11 +286,10 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory( std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { - auto& gpu = params.system.GPU(); + auto& gpu = params.gpu; gpu.ShaderNotify().MarkSharderBuilding(); - auto& engine = gpu.KeplerCompute(); - auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); + auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine); const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); const u64 uid = params.unique_identifier; auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); @@ -319,15 +318,20 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params, precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program)); } -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - Core::Frontend::EmuWindow& emu_window, const Device& device) - : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, - emu_window{emu_window}, device{device}, disk_cache{system} {} +ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, + Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_) + : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_}, + gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, device{device_} {} ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; -void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, +void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { + disk_cache.BindTitleID(title_id); const std::optional transferable = disk_cache.LoadTransferable(); if (!transferable) { return; @@ -480,21 +484,19 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram( Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, VideoCommon::Shader::AsyncShaders& async_shaders) { - if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { + if (!maxwell3d.dirty.flags[Dirty::Shaders]) { auto* last_shader = last_shaders[static_cast<std::size_t>(program)]; if (last_shader->IsBuilt()) { return last_shader; } } - auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr address{GetShaderAddress(system, program)}; + const GPUVAddr address{GetShaderAddress(maxwell3d, program)}; if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) { auto completed_work = async_shaders.GetCompletedWork(); for (auto& work : completed_work) { Shader* shader = TryGet(work.cpu_address); - auto& gpu = system.GPU(); gpu.ShaderNotify().MarkShaderComplete(); if (shader == nullptr) { continue; @@ -506,14 +508,13 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, shader->AsyncGLASMBuilt(std::move(work.program.glasm)); } + auto& registry = shader->GetRegistry(); + ShaderDiskCacheEntry entry; entry.type = work.shader_type; entry.code = std::move(work.code); entry.code_b = std::move(work.code_b); entry.unique_identifier = work.uid; - - auto& registry = shader->GetRegistry(); - entry.bound_buffer = registry.GetBoundBuffer(); entry.graphics_info = registry.GetGraphicsInfo(); entry.keys = registry.GetKeys(); @@ -524,28 +525,28 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, } // Look up shader in the cache based on address - const auto cpu_addr{memory_manager.GpuToCpuAddress(address)}; + const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)}; if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) { return last_shaders[static_cast<std::size_t>(program)] = shader; } - const auto host_ptr{memory_manager.GetPointer(address)}; + const u8* const host_ptr{gpu_memory.GetPointer(address)}; // No shader found - create a new one - ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)}; + ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)}; ProgramCode code_b; if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; - const u8* host_ptr_b = memory_manager.GetPointer(address_b); - code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false); + const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)}; + const u8* host_ptr_b = gpu_memory.GetPointer(address_b); + code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false); } const std::size_t code_size = code.size() * sizeof(u64); const u64 unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - const ShaderParameters params{system, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; + const ShaderParameters params{gpu, maxwell3d, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; std::unique_ptr<Shader> shader; const auto found = runtime_cache.find(unique_identifier); @@ -567,21 +568,20 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program, } Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)}; + const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)}; if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) { return kernel; } - const auto host_ptr{memory_manager.GetPointer(code_addr)}; // No kernel found, create a new one - ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)}; + const u8* host_ptr{gpu_memory.GetPointer(code_addr)}; + ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)}; const std::size_t code_size{code.size() * sizeof(u64)}; const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; - const ShaderParameters params{system, disk_cache, device, - *cpu_addr, host_ptr, unique_identifier}; + const ShaderParameters params{gpu, kepler_compute, disk_cache, device, + *cpu_addr, host_ptr, unique_identifier}; std::unique_ptr<Shader> kernel; const auto found = runtime_cache.find(unique_identifier); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 7528ac686..1708af06a 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -25,8 +25,8 @@ #include "video_core/shader/shader_ir.h" #include "video_core/shader_cache.h" -namespace Core { -class System; +namespace Tegra { +class MemoryManager; } namespace Core::Frontend { @@ -57,11 +57,12 @@ struct PrecompiledShader { }; struct ShaderParameters { - Core::System& system; + Tegra::GPU& gpu; + Tegra::Engines::ConstBufferEngineInterface& engine; ShaderDiskCacheOpenGL& disk_cache; const Device& device; VAddr cpu_addr; - u8* host_ptr; + const u8* host_ptr; u64 unique_identifier; }; @@ -118,12 +119,14 @@ private: class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, - Core::Frontend::EmuWindow& emu_window, const Device& device); + explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window, + Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const Device& device); ~ShaderCacheOpenGL() override; /// Loads disk cache for the current game - void LoadDiskCache(const std::atomic_bool& stop_loading, + void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback); /// Gets the current specified shader stage program @@ -138,9 +141,13 @@ private: const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, const std::unordered_set<GLenum>& supported_formats); - Core::System& system; Core::Frontend::EmuWindow& emu_window; + Tegra::GPU& gpu; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; + ShaderDiskCacheOpenGL disk_cache; std::unordered_map<u64, PrecompiledShader> runtime_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 3f75fcd2b..bbb8fb095 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -813,7 +813,7 @@ private: const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); const auto it = transform_feedback.find(location); if (it == transform_feedback.end()) { - return {}; + return std::nullopt; } return it->second.components; } @@ -1295,21 +1295,21 @@ private: switch (element) { case 0: UNIMPLEMENTED(); - return {}; + return std::nullopt; case 1: if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return {}; + return std::nullopt; } return {{"gl_Layer", Type::Int}}; case 2: if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { - return {}; + return std::nullopt; } return {{"gl_ViewportIndex", Type::Int}}; case 3: return {{"gl_PointSize", Type::Float}}; } - return {}; + return std::nullopt; case Attribute::Index::FrontColor: return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; case Attribute::Index::FrontSecondaryColor: @@ -1332,7 +1332,7 @@ private: Type::Float}}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); - return {}; + return std::nullopt; } } @@ -1443,8 +1443,10 @@ private: return expr + ", vec2(0.0), vec2(0.0))"; case TextureType::TextureCube: return expr + ", vec3(0.0), vec3(0.0))"; + default: + UNREACHABLE(); + break; } - UNREACHABLE(); } for (const auto& variant : extras) { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 52fbab3c1..166ee34e1 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -206,28 +206,32 @@ bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { flat_bindless_samplers.size(); } -ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} +ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default; ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; +void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) { + title_id = title_id_; +} + std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { // Skip games without title id - const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; + const bool has_title_id = title_id != 0; if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return {}; + return std::nullopt; } Common::FS::IOFile file(GetTransferablePath(), "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No transferable shader cache found"); is_usable = true; - return {}; + return std::nullopt; } u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return {}; + return std::nullopt; } if (version < NativeVersion) { @@ -235,12 +239,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran file.Close(); InvalidateTransferable(); is_usable = true; - return {}; + return std::nullopt; } if (version > NativeVersion) { LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " "of the emulator, skipping"); - return {}; + return std::nullopt; } // Version is valid, load the shaders @@ -249,7 +253,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran ShaderDiskCacheEntry& entry = entries.emplace_back(); if (!entry.Load(file)) { LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return {}; + return std::nullopt; } } @@ -290,12 +294,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo ShaderCacheVersionHash file_hash{}; if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { precompiled_cache_virtual_file_offset = 0; - return {}; + return std::nullopt; } if (GetShaderCacheVersionHash() != file_hash) { LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); precompiled_cache_virtual_file_offset = 0; - return {}; + return std::nullopt; } std::vector<ShaderDiskCachePrecompiled> entries; @@ -305,15 +309,16 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo if (!LoadObjectFromPrecompiled(entry.unique_identifier) || !LoadObjectFromPrecompiled(entry.binary_format) || !LoadObjectFromPrecompiled(binary_size)) { - return {}; + return std::nullopt; } entry.binary.resize(binary_size); if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return {}; + return std::nullopt; } } - return entries; + + return std::move(entries); } void ShaderDiskCacheOpenGL::InvalidateTransferable() { @@ -473,7 +478,7 @@ std::string ShaderDiskCacheOpenGL::GetBaseDir() const { } std::string ShaderDiskCacheOpenGL::GetTitleID() const { - return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID()); + return fmt::format("{:016X}", title_id); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index db2bb73bc..aef841c1d 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -21,10 +21,6 @@ #include "video_core/engines/shader_type.h" #include "video_core/shader/registry.h" -namespace Core { -class System; -} - namespace Common::FS { class IOFile; } @@ -70,9 +66,12 @@ struct ShaderDiskCachePrecompiled { class ShaderDiskCacheOpenGL { public: - explicit ShaderDiskCacheOpenGL(Core::System& system); + explicit ShaderDiskCacheOpenGL(); ~ShaderDiskCacheOpenGL(); + /// Binds a title ID for all future operations. + void BindTitleID(u64 title_id); + /// Loads transferable cache. If file has a old version or on failure, it deletes the file. std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); @@ -157,8 +156,6 @@ private: return LoadArrayFromPrecompiled(&object, 1); } - Core::System& system; - // Stores whole precompiled cache which will be read from or saved to the precompiled chache // file FileSys::VectorVfsFile precompiled_cache_virtual_file; @@ -168,8 +165,11 @@ private: // Stored transferable shaders std::unordered_set<u64> stored_transferable; + /// Title ID to operate on + u64 title_id = 0; + // The cache has been loaded at boot - bool is_usable{}; + bool is_usable = false; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 9e74eda0d..4bf0d6090 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <string_view> #include <vector> #include <glad/glad.h> #include "common/assert.h" @@ -11,7 +12,8 @@ namespace OpenGL::GLShader { namespace { -const char* GetStageDebugName(GLenum type) { + +std::string_view StageDebugName(GLenum type) { switch (type) { case GL_VERTEX_SHADER: return "vertex"; @@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) { UNIMPLEMENTED(); return "unknown"; } + } // Anonymous namespace -GLuint LoadShader(const char* source, GLenum type) { - const char* debug_type = GetStageDebugName(type); +GLuint LoadShader(std::string_view source, GLenum type) { + const std::string_view debug_type = StageDebugName(type); const GLuint shader_id = glCreateShader(type); - glShaderSource(shader_id, 1, &source, nullptr); + + const GLchar* source_string = source.data(); + const GLint source_length = static_cast<GLint>(source.size()); + + glShaderSource(shader_id, 1, &source_string, &source_length); LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); glCompileShader(shader_id); diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 03b7548c2..1b770532e 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) { * @param source String of the GLSL shader program * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) */ -GLuint LoadShader(const char* source, GLenum type); +GLuint LoadShader(std::string_view source, GLenum type); /** * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp index d24fad3de..6bcf831f2 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.cpp +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -214,10 +214,8 @@ void SetupDirtyMisc(Tables& tables) { } // Anonymous namespace -StateTracker::StateTracker(Core::System& system) : system{system} {} - -void StateTracker::Initialize() { - auto& dirty = system.GPU().Maxwell3D().dirty; +StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} { + auto& dirty = gpu.Maxwell3D().dirty; auto& tables = dirty.tables; SetupDirtyRenderTargets(tables); SetupDirtyColorMasks(tables); diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h index 0f823288e..9d127548f 100644 --- a/src/video_core/renderer_opengl/gl_state_tracker.h +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -13,8 +13,8 @@ #include "video_core/dirty_flags.h" #include "video_core/engines/maxwell_3d.h" -namespace Core { -class System; +namespace Tegra { +class GPU; } namespace OpenGL { @@ -90,9 +90,7 @@ static_assert(Last <= std::numeric_limits<u8>::max()); class StateTracker { public: - explicit StateTracker(Core::System& system); - - void Initialize(); + explicit StateTracker(Tegra::GPU& gpu); void BindIndexBuffer(GLuint new_index_buffer) { if (index_buffer == new_index_buffer) { @@ -103,7 +101,6 @@ public: } void NotifyScreenDrawVertexArray() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::VertexFormats] = true; flags[OpenGL::Dirty::VertexFormat0 + 0] = true; flags[OpenGL::Dirty::VertexFormat0 + 1] = true; @@ -117,98 +114,81 @@ public: } void NotifyPolygonModes() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::PolygonModes] = true; flags[OpenGL::Dirty::PolygonModeFront] = true; flags[OpenGL::Dirty::PolygonModeBack] = true; } void NotifyViewport0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::Viewports] = true; flags[OpenGL::Dirty::Viewport0] = true; } void NotifyScissor0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::Scissors] = true; flags[OpenGL::Dirty::Scissor0] = true; } void NotifyColorMask0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::ColorMasks] = true; flags[OpenGL::Dirty::ColorMask0] = true; } void NotifyBlend0() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::BlendStates] = true; flags[OpenGL::Dirty::BlendState0] = true; } void NotifyFramebuffer() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[VideoCommon::Dirty::RenderTargets] = true; } void NotifyFrontFace() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::FrontFace] = true; } void NotifyCullTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::CullTest] = true; } void NotifyDepthMask() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::DepthMask] = true; } void NotifyDepthTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::DepthTest] = true; } void NotifyStencilTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::StencilTest] = true; } void NotifyPolygonOffset() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::PolygonOffset] = true; } void NotifyRasterizeEnable() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::RasterizeEnable] = true; } void NotifyFramebufferSRGB() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::FramebufferSRGB] = true; } void NotifyLogicOp() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::LogicOp] = true; } void NotifyClipControl() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::ClipControl] = true; } void NotifyAlphaTest() { - auto& flags = system.GPU().Maxwell3D().dirty.flags; flags[OpenGL::Dirty::AlphaTest] = true; } private: - Core::System& system; + Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; GLuint index_buffer = 0; }; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 0a7bc9e2b..a863ef218 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -403,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() { LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); } -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { +void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); } @@ -532,10 +532,12 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const { return texture_view; } -TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, - VideoCore::RasterizerInterface& rasterizer, - const Device& device, StateTracker& state_tracker) - : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} { +TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, const Device& device, + StateTracker& state_tracker_) + : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{ + state_tracker_} { src_framebuffer.Create(); dst_framebuffer.Create(); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index bfc4ddf5d..7787134fc 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -90,7 +90,7 @@ public: Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source); - void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); + void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); void MarkAsModified(u64 tick) { surface.MarkAsModified(true, tick); @@ -129,8 +129,10 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: - explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const Device& device, StateTracker& state_tracker); + explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, const Device& device, + StateTracker& state_tracker); ~TextureCacheOpenGL(); protected: diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index fe9bd4b5a..a8be2aa37 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -47,6 +47,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_UNSIGNED_INT_2_10_10_10_REV; + default: + break; } break; case Maxwell::VertexAttribute::Type::SignedNorm: @@ -70,6 +72,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { return GL_INT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return GL_INT_2_10_10_10_REV; + default: + break; } break; case Maxwell::VertexAttribute::Type::Float: @@ -84,6 +88,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { case Maxwell::VertexAttribute::Size::Size_32_32_32: case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return GL_FLOAT; + default: + break; } break; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 14bbc3a1c..2ccca1993 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,6 +21,8 @@ #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -30,60 +32,6 @@ namespace OpenGL { namespace { -constexpr std::size_t SWAP_CHAIN_SIZE = 3; - -struct Frame { - u32 width{}; /// Width of the frame (to detect resize) - u32 height{}; /// Height of the frame - bool color_reloaded{}; /// Texture attachment was recreated (ie: resized) - OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO - OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread - OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread - GLsync render_fence{}; /// Fence created on the render thread - GLsync present_fence{}; /// Fence created on the presentation thread - bool is_srgb{}; /// Framebuffer is sRGB or RGB -}; - -constexpr char VERTEX_SHADER[] = R"( -#version 430 core - -out gl_PerVertex { - vec4 gl_Position; -}; - -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec2 vert_tex_coord; -layout (location = 0) out vec2 frag_tex_coord; - -// This is a truncated 3x3 matrix for 2D transformations: -// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. -// The third column performs translation. -// The third row could be used for projection, which we don't need in 2D. It hence is assumed to -// implicitly be [0, 0, 1] -layout (location = 0) uniform mat3x2 modelview_matrix; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - -constexpr char FRAGMENT_SHADER[] = R"( -#version 430 core - -layout (location = 0) in vec2 frag_tex_coord; -layout (location = 0) out vec4 color; - -layout (binding = 0) uniform sampler2D color_texture; - -void main() { - color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); -} -)"; - constexpr GLint PositionLocation = 0; constexpr GLint TexCoordLocation = 1; constexpr GLint ModelViewMatrixLocation = 0; @@ -96,24 +44,6 @@ struct ScreenRectVertex { std::array<GLfloat, 2> tex_coord; }; -/// Returns true if any debug tool is attached -bool HasDebugTool() { - const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); - if (nsight) { - return true; - } - - GLint num_extensions; - glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); - for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) { - const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index)); - if (!std::strcmp(name, "GL_EXT_debug_tool")) { - return true; - } - } - return false; -} - /** * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left * corner and (width, height) on the lower-bottom. @@ -197,132 +127,15 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit } // Anonymous namespace -/** - * For smooth Vsync rendering, we want to always present the latest frame that the core generates, - * but also make sure that rendering happens at the pace that the frontend dictates. This is a - * helper class that the renderer uses to sync frames between the render thread and the presentation - * thread - */ -class FrameMailbox { -public: - std::mutex swap_chain_lock; - std::condition_variable present_cv; - std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; - std::queue<Frame*> free_queue; - std::deque<Frame*> present_queue; - Frame* previous_frame{}; - - FrameMailbox() { - for (auto& frame : swap_chain) { - free_queue.push(&frame); - } - } - - ~FrameMailbox() { - // lock the mutex and clear out the present and free_queues and notify any people who are - // blocked to prevent deadlock on shutdown - std::scoped_lock lock{swap_chain_lock}; - std::queue<Frame*>().swap(free_queue); - present_queue.clear(); - present_cv.notify_all(); - } - - void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { - frame->present.Release(); - frame->present.Create(); - GLint previous_draw_fbo{}; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); - glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); - } - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); - frame->color_reloaded = false; - } - - void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { - // Recreate the color texture attachment - frame->color.Release(); - frame->color.Create(); - const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; - glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); - - // Recreate the FBO for the render target - frame->render.Release(); - frame->render.Create(); - glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); - } - - frame->width = width; - frame->height = height; - frame->color_reloaded = true; - } - - Frame* GetRenderFrame() { - std::unique_lock lock{swap_chain_lock}; - - // If theres no free frames, we will reuse the oldest render frame - if (free_queue.empty()) { - auto frame = present_queue.back(); - present_queue.pop_back(); - return frame; - } - - Frame* frame = free_queue.front(); - free_queue.pop(); - return frame; - } - - void ReleaseRenderFrame(Frame* frame) { - std::unique_lock lock{swap_chain_lock}; - present_queue.push_front(frame); - present_cv.notify_one(); - } - - Frame* TryGetPresentFrame(int timeout_ms) { - std::unique_lock lock{swap_chain_lock}; - // wait for new entries in the present_queue - present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), - [&] { return !present_queue.empty(); }); - if (present_queue.empty()) { - // timed out waiting for a frame to draw so return the previous frame - return previous_frame; - } - - // free the previous frame and add it back to the free queue - if (previous_frame) { - free_queue.push(previous_frame); - } - - // the newest entries are pushed to the front of the queue - Frame* frame = present_queue.front(); - present_queue.pop_front(); - // remove all old entries from the present queue and move them back to the free_queue - for (auto f : present_queue) { - free_queue.push(f); - } - present_queue.clear(); - previous_frame = frame; - return frame; - } -}; - -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, - Core::Frontend::GraphicsContext& context) - : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, - program_manager{device}, has_debug_tool{HasDebugTool()} {} +RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, + Core::Frontend::EmuWindow& emu_window_, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context) + : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_}, + emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {} RendererOpenGL::~RendererOpenGL() = default; -MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64)); -MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); - void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (!framebuffer) { return; @@ -331,79 +144,34 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { PrepareRendertarget(framebuffer); RenderScreenshot(); - Frame* frame; - { - MICROPROFILE_SCOPE(OpenGL_WaitPresent); - - frame = frame_mailbox->GetRenderFrame(); - - // Clean up sync objects before drawing + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + DrawScreen(emu_window.GetFramebufferLayout()); - // INTEL driver workaround. We can't delete the previous render sync object until we are - // sure that the presentation is done - if (frame->present_fence) { - glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); - } - - // delete the draw fence if the frame wasn't presented - if (frame->render_fence) { - glDeleteSync(frame->render_fence); - frame->render_fence = 0; - } - - // wait for the presentation to be done - if (frame->present_fence) { - glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); - glDeleteSync(frame->present_fence); - frame->present_fence = 0; - } - } + ++m_current_frame; - { - MICROPROFILE_SCOPE(OpenGL_RenderFrame); - const auto& layout = render_window.GetFramebufferLayout(); - - // Recreate the frame if the size of the window has changed - if (layout.width != frame->width || layout.height != frame->height || - screen_info.display_srgb != frame->is_srgb) { - LOG_DEBUG(Render_OpenGL, "Reloading render frame"); - frame->is_srgb = screen_info.display_srgb; - frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); - } - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle); - DrawScreen(layout); - // Create a fence for the frontend to wait on and swap this frame to OffTex - frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - glFlush(); - frame_mailbox->ReleaseRenderFrame(frame); - m_current_frame++; - rasterizer->TickFrame(); - } + rasterizer->TickFrame(); render_window.PollEvents(); - if (has_debug_tool) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); - Present(0); - context.SwapBuffers(); - } + context->SwapBuffers(); } void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { - if (framebuffer) { - // If framebuffer is provided, reload it from memory to a texture - if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || - screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || - screen_info.texture.pixel_format != framebuffer->pixel_format || - gl_framebuffer_data.empty()) { - // Reallocate texture if the framebuffer size has changed. - // This is expected to not happen very often and hence should not be a - // performance problem. - ConfigureFramebufferTexture(screen_info.texture, *framebuffer); - } - - // Load the framebuffer from memory, draw it to the screen, and swap buffers - LoadFBToScreenInfo(*framebuffer); + if (!framebuffer) { + return; + } + // If framebuffer is provided, reload it from memory to a texture + if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || + screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || + screen_info.texture.pixel_format != framebuffer->pixel_format || + gl_framebuffer_data.empty()) { + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(screen_info.texture, *framebuffer); } + + // Load the framebuffer from memory, draw it to the screen, and swap buffers + LoadFBToScreenInfo(*framebuffer); } void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { @@ -423,7 +191,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)}; const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel}; - u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)}; + u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)}; rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes); // TODO(Rodrigo): Read this from HLE @@ -453,17 +221,15 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color } void RendererOpenGL::InitOpenGLObjects() { - frame_mailbox = std::make_unique<FrameMailbox>(); - glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(), Settings::values.bg_blue.GetValue(), 0.0f); // Create shader programs OGLShader vertex_shader; - vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER); + vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); OGLShader fragment_shader; - fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER); + fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); vertex_program.Create(true, false, vertex_shader.handle); fragment_program.Create(true, false, fragment_shader.handle); @@ -508,7 +274,6 @@ void RendererOpenGL::AddTelemetryFields() { LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor); LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); - auto& telemetry_session = system.TelemetrySession(); constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor); telemetry_session.AddField(user_system, "GPU_Model", gpu_model); @@ -519,8 +284,8 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info, - program_manager, state_tracker); + rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device, + screen_info, program_manager, state_tracker); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, @@ -683,51 +448,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { program_manager.RestoreGuestPipeline(); } -bool RendererOpenGL::TryPresent(int timeout_ms) { - if (has_debug_tool) { - LOG_DEBUG(Render_OpenGL, - "Skipping presentation because we are presenting on the main context"); - return false; - } - return Present(timeout_ms); -} - -bool RendererOpenGL::Present(int timeout_ms) { - const auto& layout = render_window.GetFramebufferLayout(); - auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); - if (!frame) { - LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); - return false; - } - - // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a - // readback since we won't be doing any blending - glClear(GL_COLOR_BUFFER_BIT); - - // Recreate the presentation FBO if the color attachment was changed - if (frame->color_reloaded) { - LOG_DEBUG(Render_OpenGL, "Reloading present frame"); - frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height); - } - glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED); - // INTEL workaround. - // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete - // it on the emulation thread without too much penalty - // glDeleteSync(frame.render_sync); - // frame.render_sync = 0; - - glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle); - glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height, - GL_COLOR_BUFFER_BIT, GL_LINEAR); - - // Insert fence for the main thread to block on - frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); - glFlush(); - - glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); - return true; -} - void RendererOpenGL::RenderScreenshot() { if (!renderer_settings.screenshot_requested) { return; @@ -742,7 +462,7 @@ void RendererOpenGL::RenderScreenshot() { screenshot_framebuffer.Create(); glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle); - Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; + const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; GLuint renderbuffer; glGenRenderbuffers(1, &renderbuffer); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 8b18d32e6..9ef181f95 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -16,16 +16,25 @@ namespace Core { class System; -} +class TelemetrySession; +} // namespace Core namespace Core::Frontend { class EmuWindow; } +namespace Core::Memory { +class Memory; +} + namespace Layout { struct FramebufferLayout; } +namespace Tegra { +class GPU; +} + namespace OpenGL { /// Structure used for storing information about the textures for the Switch screen @@ -46,24 +55,17 @@ struct ScreenInfo { TextureInfo texture; }; -struct PresentationTexture { - u32 width = 0; - u32 height = 0; - OGLTexture texture; -}; - -class FrameMailbox; - class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, - Core::Frontend::GraphicsContext& context); + explicit RendererOpenGL(Core::TelemetrySession& telemetry_session, + Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, + Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererOpenGL() override; bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - bool TryPresent(int timeout_ms) override; private: /// Initializes the OpenGL state and creates persistent objects. @@ -91,14 +93,13 @@ private: void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); - bool Present(int timeout_ms); - + Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; - Core::System& system; - Core::Frontend::GraphicsContext& context; - const Device device; + Core::Memory::Memory& cpu_memory; + Tegra::GPU& gpu; - StateTracker state_tracker{system}; + const Device device; + StateTracker state_tracker{gpu}; // OpenGL object IDs OGLBuffer vertex_buffer; @@ -120,13 +121,8 @@ private: std::vector<u8> gl_framebuffer_data; /// Used for transforming the framebuffer orientation - Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; + Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{}; Common::Rectangle<int> framebuffer_crop_rect; - - /// Frame presentation mailbox - std::unique_ptr<FrameMailbox> frame_mailbox; - - bool has_debug_tool = false; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 81a39a3b8..da5c550ea 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -58,6 +58,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); logic_op.Assign(PackLogicOp(regs.logic_op.operation)); rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0); + topology.Assign(regs.draw.topology); std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast @@ -131,7 +132,6 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size } void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { - const u32 topology_index = static_cast<u32>(regs.draw.topology.Value()); u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { // Flip front face @@ -161,7 +161,6 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { depth_test_enable.Assign(regs.depth_test_enable); front_face.Assign(packed_front_face); depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); - topology.Assign(topology_index); cull_face.Assign(PackCullFace(regs.cull_face)); cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index cdcbb65f5..2c18eeaae 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -150,9 +150,8 @@ struct FixedPipelineState { }; union { u32 raw2; - BitField<0, 4, u32> topology; - BitField<4, 2, u32> cull_face; - BitField<6, 1, u32> cull_enable; + BitField<0, 2, u32> cull_face; + BitField<2, 1, u32> cull_enable; }; std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings; @@ -169,10 +168,6 @@ struct FixedPipelineState { Maxwell::FrontFace FrontFace() const noexcept { return UnpackFrontFace(front_face.Value()); } - - constexpr Maxwell::PrimitiveTopology Topology() const noexcept { - return static_cast<Maxwell::PrimitiveTopology>(topology.Value()); - } }; union { @@ -190,6 +185,7 @@ struct FixedPipelineState { BitField<18, 1, u32> logic_op_enable; BitField<19, 4, u32> logic_op; BitField<23, 1, u32> rasterize_enable; + BitField<24, 4, Maxwell::PrimitiveTopology> topology; }; u32 point_size; std::array<u32, Maxwell::NumVertexArrays> binding_divisors; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index f8c77f4fa..d22de1d81 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -78,9 +78,10 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w case Tegra::Texture::WrapMode::MirrorOnceBorder: UNIMPLEMENTED(); return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE; + default: + UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); + return {}; } - UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode)); - return {}; } VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) { @@ -298,9 +299,10 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; case Maxwell::PrimitiveTopology::Patches: return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST; + default: + UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); + return {}; } - UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology)); - return {}; } VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { @@ -325,6 +327,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_UNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::SignedNorm: @@ -347,6 +351,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::UnsignedScaled: @@ -369,6 +375,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_USCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_USCALED_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::SignedScaled: @@ -391,6 +399,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SSCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SSCALED_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::UnsignedInt: @@ -421,6 +431,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32A32_UINT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_UINT_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::SignedInt: @@ -451,6 +463,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32A32_SINT; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SINT_PACK32; + default: + break; } break; case Maxwell::VertexAttribute::Type::Float: @@ -471,6 +485,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_SFLOAT; + default: + break; } break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7ffc90cd0..f2610868e 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -25,9 +25,9 @@ #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" @@ -56,7 +56,7 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity, VkDebugUtilsMessageTypeFlagsEXT type, const VkDebugUtilsMessengerCallbackDataEXT* data, [[maybe_unused]] void* user_data) { - const char* message{data->pMessage}; + const char* const message{data->pMessage}; if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { LOG_CRITICAL(Render_Vulkan, "{}", message); @@ -86,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() { if (!library.Open(filename.c_str())) { // Android devices may not have libvulkan.so.1, only libvulkan.so. filename = Common::DynamicLibrary::GetVersionedFilename("vulkan"); - library.Open(filename.c_str()); + (void)library.Open(filename.c_str()); } #endif return library; @@ -240,8 +240,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext } // Anonymous namespace -RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system) - : RendererBase(window), system{system} {} +RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, + Core::Frontend::EmuWindow& emu_window, + Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context) + : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_}, + cpu_memory{cpu_memory_}, gpu{gpu_} {} RendererVulkan::~RendererVulkan() { ShutDown(); @@ -268,11 +272,11 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { scheduler->WaitWorker(); swapchain->AcquireNextImage(); - const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated); + const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated); - scheduler->Flush(false, render_semaphore); + scheduler->Flush(render_semaphore); - if (swapchain->Present(render_semaphore, fence)) { + if (swapchain->Present(render_semaphore)) { blit_screen->Recreate(); } @@ -282,11 +286,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); } -bool RendererVulkan::TryPresent(int /*timeout_ms*/) { - // TODO (bunnei): ImplementMe - return true; -} - bool RendererVulkan::Init() { library = OpenVulkanLibrary(); std::tie(instance, instance_version) = CreateInstance( @@ -299,23 +298,21 @@ bool RendererVulkan::Init() { memory_manager = std::make_unique<VKMemoryManager>(*device); - resource_manager = std::make_unique<VKResourceManager>(*device); + state_tracker = std::make_unique<StateTracker>(gpu); + + scheduler = std::make_unique<VKScheduler>(*device, *state_tracker); const auto& framebuffer = render_window.GetFramebufferLayout(); - swapchain = std::make_unique<VKSwapchain>(*surface, *device); + swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler); swapchain->Create(framebuffer.width, framebuffer.height, false); - state_tracker = std::make_unique<StateTracker>(system); - - scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker); - - rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device, - *resource_manager, *memory_manager, - *state_tracker, *scheduler); + rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(), + cpu_memory, screen_info, *device, + *memory_manager, *state_tracker, *scheduler); - blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device, - *resource_manager, *memory_manager, *swapchain, - *scheduler, screen_info); + blit_screen = + std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device, + *memory_manager, *swapchain, *scheduler, screen_info); return true; } @@ -333,7 +330,6 @@ void RendererVulkan::ShutDown() { scheduler.reset(); swapchain.reset(); memory_manager.reset(); - resource_manager.reset(); device.reset(); } @@ -442,8 +438,7 @@ void RendererVulkan::Report() const { LOG_INFO(Render_Vulkan, "Device: {}", model_name); LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version); - auto& telemetry_session = system.TelemetrySession(); - constexpr auto field = Common::Telemetry::FieldType::UserSystem; + static constexpr auto field = Common::Telemetry::FieldType::UserSystem; telemetry_session.AddField(field, "GPU_Vendor", vendor_name); telemetry_session.AddField(field, "GPU_Model", model_name); telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 9617a93e9..1044ca124 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -14,7 +14,15 @@ #include "video_core/renderer_vulkan/wrapper.h" namespace Core { -class System; +class TelemetrySession; +} + +namespace Core::Memory { +class Memory; +} + +namespace Tegra { +class GPU; } namespace Vulkan { @@ -22,9 +30,7 @@ namespace Vulkan { class StateTracker; class VKBlitScreen; class VKDevice; -class VKFence; class VKMemoryManager; -class VKResourceManager; class VKSwapchain; class VKScheduler; class VKImage; @@ -38,13 +44,15 @@ struct VKScreenInfo { class RendererVulkan final : public VideoCore::RendererBase { public: - explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); + explicit RendererVulkan(Core::TelemetrySession& telemtry_session, + Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory, + Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererVulkan() override; bool Init() override; void ShutDown() override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - bool TryPresent(int timeout_ms) override; static std::vector<std::string> EnumerateDevices(); @@ -57,7 +65,9 @@ private: void Report() const; - Core::System& system; + Core::TelemetrySession& telemetry_session; + Core::Memory::Memory& cpu_memory; + Tegra::GPU& gpu; Common::DynamicLibrary library; vk::InstanceDispatch dld; @@ -71,11 +81,10 @@ private: vk::DebugCallback debug_callback; std::unique_ptr<VKDevice> device; - std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKMemoryManager> memory_manager; - std::unique_ptr<VKResourceManager> resource_manager; std::unique_ptr<StateTracker> state_tracker; std::unique_ptr<VKScheduler> scheduler; + std::unique_ptr<VKSwapchain> swapchain; std::unique_ptr<VKBlitScreen> blit_screen; }; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index a551e3de8..b5b60309e 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -12,11 +12,9 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/math_util.h" - #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/memory.h" - #include "video_core/gpu.h" #include "video_core/morton.h" #include "video_core/rasterizer_interface.h" @@ -24,8 +22,8 @@ #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_image.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_swapchain.h" @@ -210,17 +208,15 @@ struct VKBlitScreen::BufferData { // Unaligned image data goes here }; -VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, - VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKSwapchain& swapchain, VKScheduler& scheduler, - const VKScreenInfo& screen_info) - : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain}, - scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} { - watches.resize(image_count); - std::generate(watches.begin(), watches.end(), - []() { return std::make_unique<VKFenceWatch>(); }); +VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_, + Core::Frontend::EmuWindow& render_window_, + VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_, + VKMemoryManager& memory_manager_, VKSwapchain& swapchain_, + VKScheduler& scheduler_, const VKScreenInfo& screen_info_) + : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_}, + device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_}, + scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { + resource_ticks.resize(image_count); CreateStaticResources(); CreateDynamicResources(); @@ -232,15 +228,16 @@ void VKBlitScreen::Recreate() { CreateDynamicResources(); } -std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { +VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) { RefreshResources(framebuffer); // Finish any pending renderpass scheduler.RequestOutsideRenderPassOperationContext(); const std::size_t image_index = swapchain.GetImageIndex(); - watches[image_index]->Watch(scheduler.GetFence()); + + scheduler.Wait(resource_ticks[image_index]); + resource_ticks[image_index] = scheduler.CurrentTick(); VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get(); @@ -259,7 +256,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon const auto pixel_format = VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format); const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const auto host_ptr = system.Memory().GetPointer(framebuffer_addr); + const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr); rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer)); // TODO(Rodrigo): Read this from HLE @@ -343,7 +340,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon cmdbuf.EndRenderPass(); }); - return {scheduler.GetFence(), *semaphores[image_index]}; + return *semaphores[image_index]; } void VKBlitScreen::CreateStaticResources() { @@ -711,7 +708,7 @@ void VKBlitScreen::CreateFramebuffers() { void VKBlitScreen::ReleaseRawImages() { for (std::size_t i = 0; i < raw_images.size(); ++i) { - watches[i]->Wait(); + scheduler.Wait(resource_ticks.at(i)); } raw_images.clear(); raw_buffer_commits.clear(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 243640fab..8f2839214 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -5,16 +5,18 @@ #pragma once #include <memory> -#include <tuple> #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { class System; } +namespace Core::Memory { +class Memory; +} + namespace Core::Frontend { class EmuWindow; } @@ -30,26 +32,26 @@ class RasterizerInterface; namespace Vulkan { struct ScreenInfo; + class RasterizerVulkan; class VKDevice; -class VKFence; class VKImage; class VKScheduler; class VKSwapchain; class VKBlitScreen final { public: - explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window, + explicit VKBlitScreen(Core::Memory::Memory& cpu_memory, + Core::Frontend::EmuWindow& render_window, VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - VKSwapchain& swapchain, VKScheduler& scheduler, - const VKScreenInfo& screen_info); + VKMemoryManager& memory_manager, VKSwapchain& swapchain, + VKScheduler& scheduler, const VKScreenInfo& screen_info); ~VKBlitScreen(); void Recreate(); - std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated); + [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated); private: struct BufferData; @@ -81,11 +83,10 @@ private: u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, std::size_t image_index) const; - Core::System& system; + Core::Memory::Memory& cpu_memory; Core::Frontend::EmuWindow& render_window; VideoCore::RasterizerInterface& rasterizer; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; VKSwapchain& swapchain; VKScheduler& scheduler; @@ -106,7 +107,7 @@ private: vk::Buffer buffer; VKMemoryCommit buffer_commit; - std::vector<std::unique_ptr<VKFenceWatch>> watches; + std::vector<u64> resource_ticks; std::vector<vk::Semaphore> semaphores; std::vector<std::unique_ptr<VKImage>> raw_images; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 1d2f8b557..d9d3da9ea 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -145,14 +145,15 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst }); } -VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, VKStagingBufferPool& staging_pool) - : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system, - CreateStreamBuffer(device, - scheduler)}, - device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ - staging_pool} {} +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, + const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_) + : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory, + CreateStreamBuffer(device_, + scheduler_)}, + device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} {} VKBufferCache::~VKBufferCache() = default; diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index 991ee451c..7fb5ceedf 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -13,10 +13,6 @@ #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/wrapper.h" -namespace Core { -class System; -} - namespace Vulkan { class VKDevice; @@ -53,7 +49,8 @@ private: class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> { public: - explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp new file mode 100644 index 000000000..6339f4fe0 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp @@ -0,0 +1,46 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <cstddef> + +#include "video_core/renderer_vulkan/vk_command_pool.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; + +struct CommandPool::Pool { + vk::CommandPool handle; + vk::CommandBuffers cmdbufs; +}; + +CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device) + : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {} + +CommandPool::~CommandPool() = default; + +void CommandPool::Allocate(size_t begin, size_t end) { + // Command buffers are going to be commited, recorded, executed every single usage cycle. + // They are also going to be reseted when commited. + Pool& pool = pools.emplace_back(); + pool.handle = device.GetLogical().CreateCommandPool({ + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .pNext = nullptr, + .flags = + VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = device.GetGraphicsFamily(), + }); + pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); +} + +VkCommandBuffer CommandPool::Commit() { + const size_t index = CommitResource(); + const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; + const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; + return pools[pool_index].cmdbufs[sub_index]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h new file mode 100644 index 000000000..b9cb3fb5d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_command_pool.h @@ -0,0 +1,34 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <cstddef> +#include <vector> + +#include "video_core/renderer_vulkan/vk_resource_pool.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +class MasterSemaphore; +class VKDevice; + +class CommandPool final : public ResourcePool { +public: + explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device); + ~CommandPool() override; + + void Allocate(size_t begin, size_t end) override; + + VkCommandBuffer Commit(); + +private: + struct Pool; + + const VKDevice& device; + std::vector<Pool> pools; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 182461ed9..9637c6059 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -112,7 +112,8 @@ constexpr u8 quad_array[] = { 0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() { return { @@ -218,7 +219,8 @@ constexpr u8 uint8_pass[] = { 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; // Quad indexed SPIR-V module. Generated from the "shaders/" directory. constexpr u8 QUAD_INDEXED_SPV[] = { @@ -341,7 +343,8 @@ constexpr u8 QUAD_INDEXED_SPV[] = { 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, - 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00, +}; std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { return {{ @@ -448,12 +451,12 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto VKComputePass::~VKComputePass() = default; -VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, - VKFence& fence) { +VkDescriptorSet VKComputePass::CommitDescriptorSet( + VKUpdateDescriptorQueue& update_descriptor_queue) { if (!descriptor_template) { return nullptr; } - const auto set = descriptor_allocator->Commit(fence); + const VkDescriptorSet set = descriptor_allocator->Commit(); update_descriptor_queue.Send(*descriptor_template, set); return set; } @@ -477,7 +480,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); - const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); @@ -520,13 +523,13 @@ Uint8Pass::~Uint8Pass() = default; std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset) { - const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); + const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16)); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); - const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, @@ -589,7 +592,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( update_descriptor_queue.Acquire(); update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); - const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index 230b526bc..acc94f27e 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -15,7 +15,6 @@ namespace Vulkan { class VKDevice; -class VKFence; class VKScheduler; class VKStagingBufferPool; class VKUpdateDescriptorQueue; @@ -30,8 +29,7 @@ public: ~VKComputePass(); protected: - VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue, - VKFence& fence); + VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue); vk::DescriptorUpdateTemplateKHR descriptor_template; vk::PipelineLayout layout; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index ed9d2991c..9be72dc9b 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -32,7 +32,7 @@ VkDescriptorSet VKComputePipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } - const auto set = descriptor_allocator.Commit(scheduler.GetFence()); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(*descriptor_template, set); return set; } diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index ac4a0884e..f38e089d5 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -7,7 +7,8 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { @@ -15,14 +16,15 @@ namespace Vulkan { // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines. constexpr std::size_t SETS_GROW_RATE = 0x20; -DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool, - VkDescriptorSetLayout layout) - : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {} +DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_, + VkDescriptorSetLayout layout_) + : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE), + descriptor_pool{descriptor_pool_}, layout{layout_} {} DescriptorAllocator::~DescriptorAllocator() = default; -VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) { - const std::size_t index = CommitResource(fence); +VkDescriptorSet DescriptorAllocator::Commit() { + const std::size_t index = CommitResource(); return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE]; } @@ -30,8 +32,9 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) { descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin)); } -VKDescriptorPool::VKDescriptorPool(const VKDevice& device) - : device{device}, active_pool{AllocateNewPool()} {} +VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler) + : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{ + AllocateNewPool()} {} VKDescriptorPool::~VKDescriptorPool() = default; diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h index 9efa66bef..544f32a20 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h @@ -6,21 +6,24 @@ #include <vector> -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { +class VKDevice; class VKDescriptorPool; +class VKScheduler; -class DescriptorAllocator final : public VKFencedPool { +class DescriptorAllocator final : public ResourcePool { public: explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout); ~DescriptorAllocator() override; + DescriptorAllocator& operator=(const DescriptorAllocator&) = delete; DescriptorAllocator(const DescriptorAllocator&) = delete; - VkDescriptorSet Commit(VKFence& fence); + VkDescriptorSet Commit(); protected: void Allocate(std::size_t begin, std::size_t end) override; @@ -36,15 +39,19 @@ class VKDescriptorPool final { friend DescriptorAllocator; public: - explicit VKDescriptorPool(const VKDevice& device); + explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler); ~VKDescriptorPool(); + VKDescriptorPool(const VKDescriptorPool&) = delete; + VKDescriptorPool& operator=(const VKDescriptorPool&) = delete; + private: vk::DescriptorPool* AllocateNewPool(); vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count); const VKDevice& device; + MasterSemaphore& master_semaphore; std::vector<vk::DescriptorPool> pools; vk::DescriptorPool* active_pool; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 90916ee0e..e1217ca83 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -45,6 +45,7 @@ constexpr std::array REQUIRED_EXTENSIONS{ VK_KHR_8BIT_STORAGE_EXTENSION_NAME, VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME, + VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME, VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME, VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME, @@ -81,6 +82,21 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType } } +[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) { + static constexpr std::array RDNA_DEVICES{ + "5700", + "5600", + "5500", + "5300", + }; + if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) { + return false; + } + return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) { + return device_name.find(name) != std::string_view::npos; + }); +} + std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties( vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) { static constexpr std::array formats{ @@ -253,6 +269,13 @@ bool VKDevice::Create() { .inheritedQueries = false, }; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR, + .pNext = nullptr, + .timelineSemaphore = true, + }; + SetNext(next, timeline_semaphore); + VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, .pNext = nullptr, @@ -383,6 +406,15 @@ bool VKDevice::Create() { CollectTelemetryParameters(); + if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) { + // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it + // seems to cause stability issues + LOG_WARNING( + Render_Vulkan, + "Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state"); + ext_extended_dynamic_state = false; + } + graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index d7f65d435..5babbdd0b 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -29,8 +29,8 @@ void InnerFence::Queue() { } ASSERT(!event); - event = device.GetLogical().CreateNewEvent(); - ticks = scheduler.Ticks(); + event = device.GetLogical().CreateEvent(); + ticks = scheduler.CurrentTick(); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) { @@ -52,7 +52,7 @@ void InnerFence::Wait() { } ASSERT(event); - if (ticks >= scheduler.Ticks()) { + if (ticks >= scheduler.CurrentTick()) { scheduler.Flush(); } while (!IsEventSignalled()) { @@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const { } } -VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKTextureCache& texture_cache, VKBufferCache& buffer_cache, - VKQueryCache& query_cache) - : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache), - device{device}, scheduler{scheduler} {} +VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, + VKBufferCache& buffer_cache, VKQueryCache& query_cache, + const VKDevice& device_, VKScheduler& scheduler_) + : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache), + device{device_}, scheduler{scheduler_} {} Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) { return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed); diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 043fe7947..1547d6d30 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -55,10 +55,10 @@ using GenericFenceManager = class VKFenceManager final : public GenericFenceManager { public: - explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKTextureCache& texture_cache, VKBufferCache& buffer_cache, - VKQueryCache& query_cache); + explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu, + Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache, + VKBufferCache& buffer_cache, VKQueryCache& query_cache, + const VKDevice& device, VKScheduler& scheduler); protected: Fence CreateFence(u32 value, bool is_stubbed) override; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 2e46c6278..696eaeb5f 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -93,7 +93,7 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() { if (!descriptor_template) { return {}; } - const auto set = descriptor_allocator.Commit(scheduler.GetFence()); + const VkDescriptorSet set = descriptor_allocator.Commit(); update_descriptor_queue.Send(*descriptor_template, set); return set; } @@ -261,12 +261,12 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa vertex_input_ci.pNext = &input_divisor_ci; } - const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()); + const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology); const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()), + .topology = MaxwellToVK::PrimitiveTopology(device, state.topology), .primitiveRestartEnable = state.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_topology), }; @@ -400,7 +400,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa static constexpr std::array extended{ VK_DYNAMIC_STATE_CULL_MODE_EXT, VK_DYNAMIC_STATE_FRONT_FACE_EXT, - VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT, VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT, VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT, VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT, diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp new file mode 100644 index 000000000..ae26e558d --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp @@ -0,0 +1,56 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <atomic> +#include <chrono> + +#include "core/settings.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +using namespace std::chrono_literals; + +MasterSemaphore::MasterSemaphore(const VKDevice& device) { + static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR, + .pNext = nullptr, + .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR, + .initialValue = 0, + }; + static constexpr VkSemaphoreCreateInfo semaphore_ci{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, + .pNext = &semaphore_type_ci, + .flags = 0, + }; + semaphore = device.GetLogical().CreateSemaphore(semaphore_ci); + + if (!Settings::values.renderer_debug) { + return; + } + // Validation layers have a bug where they fail to track resource usage when using timeline + // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have + // a separate thread waiting for each timeline semaphore value. + debug_thread = std::thread([this] { + u64 counter = 0; + while (!shutdown) { + if (semaphore.Wait(counter, 10'000'000)) { + ++counter; + } + } + }); +} + +MasterSemaphore::~MasterSemaphore() { + shutdown = true; + + // This thread might not be started + if (debug_thread.joinable()) { + debug_thread.join(); + } +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h new file mode 100644 index 000000000..0e93706d7 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h @@ -0,0 +1,70 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <atomic> +#include <thread> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/wrapper.h" + +namespace Vulkan { + +class VKDevice; + +class MasterSemaphore { +public: + explicit MasterSemaphore(const VKDevice& device); + ~MasterSemaphore(); + + /// Returns the current logical tick. + [[nodiscard]] u64 CurrentTick() const noexcept { + return current_tick; + } + + /// Returns the timeline semaphore handle. + [[nodiscard]] VkSemaphore Handle() const noexcept { + return *semaphore; + } + + /// Returns true when a tick has been hit by the GPU. + [[nodiscard]] bool IsFree(u64 tick) { + return gpu_tick >= tick; + } + + /// Advance to the logical tick. + void NextTick() noexcept { + ++current_tick; + } + + /// Refresh the known GPU tick + void Refresh() { + gpu_tick = semaphore.GetCounter(); + } + + /// Waits for a tick to be hit on the GPU + void Wait(u64 tick) { + // No need to wait if the GPU is ahead of the tick + if (IsFree(tick)) { + return; + } + // Update the GPU tick and try again + Refresh(); + if (IsFree(tick)) { + return; + } + // If none of the above is hit, fallback to a regular wait + semaphore.Wait(tick); + } + +private: + vk::Semaphore semaphore; ///< Timeline semaphore. + std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick. + std::atomic<u64> current_tick{1}; ///< Current logical tick. + std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed. + std::thread debug_thread; ///< Debug thread to workaround validation layer bugs. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index cfdcdd6ab..dedc9c466 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -135,64 +135,56 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con return std::memcmp(&rhs, this, sizeof *this) == 0; } -Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, - VideoCommon::Shader::ProgramCode program_code, u32 main_offset) - : gpu_addr{gpu_addr}, program_code{std::move(program_code)}, - registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset, - compiler_settings, registry}, - entries{GenerateShaderEntries(shader_ir)} {} +Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage, + GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_, + u32 main_offset) + : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine), + shader_ir(program_code, main_offset, compiler_settings, registry), + entries(GenerateShaderEntries(shader_ir)) {} Shader::~Shader() = default; -Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system, - Tegra::Engines::ShaderType stage) { - if (stage == ShaderType::Compute) { - return system.GPU().KeplerCompute(); - } else { - return system.GPU().Maxwell3D(); - } -} - -VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, - VKUpdateDescriptorQueue& update_descriptor_queue, - VKRenderPassCache& renderpass_cache) - : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device}, - scheduler{scheduler}, descriptor_pool{descriptor_pool}, - update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {} +VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const VKDevice& device_, + VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_, + VKUpdateDescriptorQueue& update_descriptor_queue_, + VKRenderPassCache& renderpass_cache_) + : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, + scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {} VKPipelineCache::~VKPipelineCache() = default; std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { - const auto& gpu = system.GPU().Maxwell3D(); - std::array<Shader*, Maxwell::MaxShaderProgram> shaders{}; + for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto program{static_cast<Maxwell::ShaderProgram>(index)}; // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { continue; } - auto& memory_manager{system.GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(system, program)}; - const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)}; + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); if (!result) { - const auto host_ptr{memory_manager.GetPointer(program_addr)}; + const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)}; // No shader found - create a new one - constexpr u32 stage_offset = STAGE_MAIN_OFFSET; + static constexpr u32 stage_offset = STAGE_MAIN_OFFSET; const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1); - ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false); + ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code), - stage_offset); + auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr, + std::move(code), stage_offset); result = shader.get(); if (cpu_addr) { @@ -215,11 +207,11 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( } last_graphics_key = key; - if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) { + if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) { std::unique_lock lock{pipeline_cache}; const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); if (is_cache_miss) { - system.GPU().ShaderNotify().MarkSharderBuilding(); + gpu.ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, @@ -233,13 +225,13 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { - system.GPU().ShaderNotify().MarkSharderBuilding(); + gpu.ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); - system.GPU().ShaderNotify().MarkShaderComplete(); + gpu.ShaderNotify().MarkShaderComplete(); } last_graphics_pipeline = entry.get(); return last_graphics_pipeline; @@ -255,22 +247,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - auto& memory_manager = system.GPU().MemoryManager(); - const auto program_addr = key.shader; + const GPUVAddr gpu_addr = key.shader; - const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); ASSERT(cpu_addr); Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get(); if (!shader) { // No shader found - create a new one - const auto host_ptr = memory_manager.GetPointer(program_addr); + const auto host_ptr = gpu_memory.GetPointer(gpu_addr); - ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true); + ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true); const std::size_t size_in_bytes = code.size() * sizeof(u64); - auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr, - std::move(code), KERNEL_MAIN_OFFSET); + auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr, + *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET); shader = shader_info.get(); if (cpu_addr) { @@ -298,7 +289,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach } void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { - system.GPU().ShaderNotify().MarkShaderComplete(); + gpu.ShaderNotify().MarkShaderComplete(); std::unique_lock lock{pipeline_cache}; graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); } @@ -339,12 +330,8 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { - auto& memory_manager = system.GPU().MemoryManager(); - const auto& gpu = system.GPU().Maxwell3D(); - Specialization specialization; - if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points || - device.IsExtExtendedDynamicStateSupported()) { + if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) { float point_size; std::memcpy(&point_size, &fixed_state.point_size, sizeof(float)); specialization.point_size = point_size; @@ -364,12 +351,12 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { const auto program_enum = static_cast<Maxwell::ShaderProgram>(index); // Skip stages that are not enabled - if (!gpu.regs.IsShaderConfigEnabled(index)) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { continue; } - const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum); - const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get(); const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5 diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index c04829e77..e558e6658 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -38,7 +38,6 @@ class RasterizerVulkan; class VKComputePipeline; class VKDescriptorPool; class VKDevice; -class VKFence; class VKScheduler; class VKUpdateDescriptorQueue; @@ -85,7 +84,8 @@ namespace Vulkan { class Shader { public: - explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, + explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine, + Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code, u32 main_offset); ~Shader(); @@ -97,22 +97,19 @@ public: return shader_ir; } - const VideoCommon::Shader::Registry& GetRegistry() const { - return registry; - } - const VideoCommon::Shader::ShaderIR& GetIR() const { return shader_ir; } + const VideoCommon::Shader::Registry& GetRegistry() const { + return registry; + } + const ShaderEntries& GetEntries() const { return entries; } private: - static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system, - Tegra::Engines::ShaderType stage); - GPUVAddr gpu_addr{}; VideoCommon::Shader::ProgramCode program_code; VideoCommon::Shader::Registry registry; @@ -122,9 +119,11 @@ private: class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> { public: - explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer, - const VKDevice& device, VKScheduler& scheduler, - VKDescriptorPool& descriptor_pool, + explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, const VKDevice& device, + VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, VKRenderPassCache& renderpass_cache); ~VKPipelineCache() override; @@ -145,7 +144,11 @@ private: std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( const FixedPipelineState& fixed_state); - Core::System& system; + Tegra::GPU& gpu; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + const VKDevice& device; VKScheduler& scheduler; VKDescriptorPool& descriptor_pool; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 6cd63d090..ee2d871e3 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -9,35 +9,33 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { +using VideoCore::QueryType; + namespace { constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION}; -constexpr VkQueryType GetTarget(VideoCore::QueryType type) { +constexpr VkQueryType GetTarget(QueryType type) { return QUERY_TARGETS[static_cast<std::size_t>(type)]; } } // Anonymous namespace -QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {} +QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_) + : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {} QueryPool::~QueryPool() = default; -void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) { - device = &device_; - type = type_; -} - -std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { +std::pair<VkQueryPool, u32> QueryPool::Commit() { std::size_t index; do { - index = CommitResource(fence); + index = CommitResource(); } while (usage[index]); usage[index] = true; @@ -47,7 +45,7 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) { void QueryPool::Allocate(std::size_t begin, std::size_t end) { usage.resize(end); - pools.push_back(device->GetLogical().CreateQueryPool({ + pools.push_back(device.GetLogical().CreateQueryPool({ .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -68,30 +66,39 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) { usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false; } -VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, +VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKScheduler& scheduler) - : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, - QueryPool>{system, rasterizer}, - device{device}, scheduler{scheduler} { - for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) { - query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i)); + : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, + HostCounter>{rasterizer, maxwell3d, gpu_memory}, + device{device}, scheduler{scheduler}, query_pools{ + QueryPool{device, scheduler, + QueryType::SamplesPassed}, + } {} + +VKQueryCache::~VKQueryCache() { + // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class + // destructor is called. The query cache should be redesigned to have a proper ownership model + // instead of using shared pointers. + for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) { + auto& stream = Stream(static_cast<QueryType>(query_type)); + stream.Update(false); + stream.Reset(); } } -VKQueryCache::~VKQueryCache() = default; - -std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) { - return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence()); +std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) { + return query_pools[static_cast<std::size_t>(type)].Commit(); } -void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) { +void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) { query_pools[static_cast<std::size_t>(type)].Reserve(query); } HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency, - VideoCore::QueryType type) + QueryType type) : VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache}, - type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} { + type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} { const vk::Device* logical = &cache.Device().GetLogical(); cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) { logical->ResetQueryPoolEXT(query.first, query.second, 1); @@ -109,7 +116,7 @@ void HostCounter::EndQuery() { } u64 HostCounter::BlockingQuery() const { - if (ticks >= cache.Scheduler().Ticks()) { + if (tick >= cache.Scheduler().CurrentTick()) { cache.Scheduler().Flush(); } u64 data; diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h index 40119e6d3..2e57fb75d 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.h +++ b/src/video_core/renderer_vulkan/vk_query_cache.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/query_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/renderer_vulkan/wrapper.h" namespace VideoCore { @@ -28,14 +28,12 @@ class VKScheduler; using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>; -class QueryPool final : public VKFencedPool { +class QueryPool final : public ResourcePool { public: - explicit QueryPool(); + explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type); ~QueryPool() override; - void Initialize(const VKDevice& device, VideoCore::QueryType type); - - std::pair<VkQueryPool, u32> Commit(VKFence& fence); + std::pair<VkQueryPool, u32> Commit(); void Reserve(std::pair<VkQueryPool, u32> query); @@ -45,18 +43,18 @@ protected: private: static constexpr std::size_t GROW_STEP = 512; - const VKDevice* device = nullptr; - VideoCore::QueryType type = {}; + const VKDevice& device; + const VideoCore::QueryType type; std::vector<vk::QueryPool> pools; std::vector<bool> usage; }; class VKQueryCache final - : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter, - QueryPool> { + : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> { public: - explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, + explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, const VKDevice& device, VKScheduler& scheduler); ~VKQueryCache(); @@ -75,6 +73,7 @@ public: private: const VKDevice& device; VKScheduler& scheduler; + std::array<QueryPool, VideoCore::NumQueryTypes> query_pools; }; class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> { @@ -91,7 +90,7 @@ private: VKQueryCache& cache; const VideoCore::QueryType type; const std::pair<VkQueryPool, u32> query; - const u64 ticks; + const u64 tick; }; class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 936f76195..e0fb8693f 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -31,7 +31,6 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -381,28 +380,28 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const { } } -RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, - VKScreenInfo& screen_info, const VKDevice& device, - VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, StateTracker& state_tracker, - VKScheduler& scheduler) - : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, - screen_info{screen_info}, device{device}, resource_manager{resource_manager}, - memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler}, - staging_pool(device, memory_manager, scheduler), descriptor_pool(device), - update_descriptor_queue(device, scheduler), renderpass_cache(device), +RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_, + Tegra::MemoryManager& gpu_memory_, + Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_, + const VKDevice& device_, VKMemoryManager& memory_manager_, + StateTracker& state_tracker_, VKScheduler& scheduler_) + : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_), + maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_), + device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_), + scheduler(scheduler_), staging_pool(device, memory_manager, scheduler), + descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler), + renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), - texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, - staging_pool), - pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, - renderpass_cache), - buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), - sampler_cache(device), - fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), - query_cache(system, *this, device, scheduler), - wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { + texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool), + pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + descriptor_pool, update_descriptor_queue, renderpass_cache), + buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool), + sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler), + fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device, + scheduler), + wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) { scheduler.SetQueryCache(query_cache); if (device.UseAsynchronousShaders()) { async_shaders.AllocateWorkers(); @@ -414,15 +413,13 @@ RasterizerVulkan::~RasterizerVulkan() = default; void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Drawing); + SCOPE_EXIT({ gpu.TickWork(); }); FlushWork(); query_cache.UpdateCounters(); - SCOPE_EXIT({ system.GPU().TickWork(); }); - - const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; - key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); + key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); @@ -480,8 +477,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { void RasterizerVulkan::Clear() { MICROPROFILE_SCOPE(Vulkan_Clearing); - const auto& gpu = system.GPU().Maxwell3D(); - if (!system.GPU().Maxwell3D().ShouldExecute()) { + if (!maxwell3d.ShouldExecute()) { return; } @@ -490,7 +486,7 @@ void RasterizerVulkan::Clear() { query_cache.UpdateCounters(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A; const bool use_depth = regs.clear_buffers.Z; @@ -559,7 +555,7 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { query_cache.UpdateCounters(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; auto& pipeline = pipeline_cache.GetComputePipeline({ .shader = code_addr, .shared_memory_size = launch_desc.shared_alloc, @@ -655,16 +651,14 @@ void RasterizerVulkan::SyncGuestHost() { } void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { - gpu.MemoryManager().Write<u32>(addr, value); + gpu_memory.Write<u32>(addr, value); return; } fence_manager.SignalSemaphore(addr, value); } void RasterizerVulkan::SignalSyncPoint(u32 value) { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { gpu.IncrementSyncPoint(value); return; @@ -673,7 +667,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { } void RasterizerVulkan::ReleaseFences() { - auto& gpu{system.GPU()}; if (!gpu.IsAsync()) { return; } @@ -751,10 +744,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerVulkan::SetupDirtyFlags() { - state_tracker.Initialize(); -} - void RasterizerVulkan::FlushWork() { static constexpr u32 DRAWS_TO_DISPATCH = 4096; @@ -778,10 +767,9 @@ void RasterizerVulkan::FlushWork() { RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { MICROPROFILE_SCOPE(Vulkan_RenderTargets); - auto& maxwell3d = system.GPU().Maxwell3D(); - auto& dirty = maxwell3d.dirty.flags; - auto& regs = maxwell3d.regs; + const auto& regs = maxwell3d.regs; + auto& dirty = maxwell3d.dirty.flags; const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; dirty[VideoCommon::Dirty::RenderTargets] = false; @@ -844,7 +832,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers( return true; }; - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); for (std::size_t index = 0; index < num_attachments; ++index) { if (try_push(color_attachments[index])) { @@ -880,13 +868,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt bool is_instanced) { MICROPROFILE_SCOPE(Vulkan_Geometry); - const auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; + const auto& regs = maxwell3d.regs; SetupVertexArrays(buffer_bindings); const u32 base_instance = regs.vb_base_instance; - const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; + const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1; const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; @@ -947,7 +934,7 @@ void RasterizerVulkan::SetupImageTransitions( } void RasterizerVulkan::UpdateDynamicStates() { - auto& regs = system.GPU().Maxwell3D().regs; + auto& regs = maxwell3d.regs; UpdateViewportsState(regs); UpdateScissorsState(regs); UpdateDepthBias(regs); @@ -961,14 +948,13 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateDepthWriteEnable(regs); UpdateDepthCompareOp(regs); UpdateFrontFace(regs); - UpdatePrimitiveTopology(regs); UpdateStencilOp(regs); UpdateStencilTestEnable(regs); } } void RasterizerVulkan::BeginTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -1000,7 +986,7 @@ void RasterizerVulkan::BeginTransformFeedback() { } void RasterizerVulkan::EndTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; if (regs.tfb_enabled == 0) { return; } @@ -1013,7 +999,7 @@ void RasterizerVulkan::EndTransformFeedback() { } void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; @@ -1039,7 +1025,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar if (params.num_vertices == 0) { return; } - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; switch (regs.draw.topology) { case Maxwell::PrimitiveTopology::Quads: { if (!params.is_indexed) { @@ -1087,8 +1073,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& gpu = system.GPU().Maxwell3D(); - const auto& shader_stage = gpu.state.shader_stages[stage]; + const auto& shader_stage = maxwell3d.state.shader_stages[stage]; for (const auto& entry : entries.const_buffers) { SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); } @@ -1096,8 +1081,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - auto& gpu{system.GPU()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]}; + const auto& cbufs{maxwell3d.state.shader_stages[stage]}; for (const auto& entry : entries.global_buffers) { const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); @@ -1107,19 +1091,17 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(gpu, entry, stage).tic; + const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; SetupUniformTexels(image, entry); } } void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(gpu, entry, stage, i); + const auto texture = GetTextureInfo(maxwell3d, entry, stage, i); SetupTexture(texture, entry); } } @@ -1127,25 +1109,23 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std:: void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(gpu, entry, stage).tic; + const auto image = GetTextureInfo(maxwell3d, entry, stage).tic; SetupStorageTexel(image, entry); } } void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { MICROPROFILE_SCOPE(Vulkan_Images); - const auto& gpu = system.GPU().Maxwell3D(); for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(gpu, entry, stage).tic; + const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic; SetupImage(tic, entry); } } void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_ConstBuffers); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const auto& launch_desc = kepler_compute.launch_description; for (const auto& entry : entries.const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); @@ -1159,7 +1139,7 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); - const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; + const auto& cbufs{kepler_compute.launch_description.const_buffer_config}; for (const auto& entry : entries.global_buffers) { const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; SetupGlobalBuffer(entry, addr); @@ -1168,19 +1148,17 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.uniform_texels) { - const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupUniformTexels(image, entry); } } void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.samplers) { for (std::size_t i = 0; i < entry.size; ++i) { - const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i); + const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i); SetupTexture(texture, entry); } } @@ -1188,18 +1166,16 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Textures); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.storage_texels) { - const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupStorageTexel(image, entry); } } void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { MICROPROFILE_SCOPE(Vulkan_Images); - const auto& gpu = system.GPU().KeplerCompute(); for (const auto& entry : entries.images) { - const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic; SetupImage(tic, entry); } } @@ -1223,9 +1199,8 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, } void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { - auto& memory_manager{system.GPU().MemoryManager()}; - const auto actual_addr = memory_manager.Read<u64>(address); - const auto size = memory_manager.Read<u32>(address + 8); + const u64 actual_addr = gpu_memory.Read<u64>(address); + const u32 size = gpu_memory.Read<u32>(address + 8); if (size == 0) { // Sometimes global memory pointers don't have a proper size. Upload a dummy entry @@ -1442,16 +1417,6 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { [front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); }); } -void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchPrimitiveTopology()) { - return; - } - const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); - scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { - cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); - }); -} - void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchStencilOp()) { return; @@ -1508,7 +1473,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { } std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; std::size_t size = 0; for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { @@ -1523,9 +1488,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { } std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { - const auto& regs = system.GPU().Maxwell3D().regs; - return static_cast<std::size_t>(regs.index_array.count) * - static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); + return static_cast<std::size_t>(maxwell3d.regs.index_array.count) * + static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes()); } std::size_t RasterizerVulkan::CalculateConstBufferSize( @@ -1540,7 +1504,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( } RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { - const auto& regs = system.GPU().Maxwell3D().regs; + const auto& regs = maxwell3d.regs; const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count); RenderPassParams params; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index f640ba649..237e51fa4 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -25,7 +25,6 @@ #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_sampler_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" @@ -106,10 +105,11 @@ struct ImageView { class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { public: - explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, + explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, - StateTracker& state_tracker, VKScheduler& scheduler); + VKMemoryManager& memory_manager, StateTracker& state_tracker, + VKScheduler& scheduler); ~RasterizerVulkan() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -135,7 +135,6 @@ public: const Tegra::Engines::Fermi2D::Config& copy_config) override; bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, u32 pixel_stride) override; - void SetupDirtyFlags() override; VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { return async_shaders; @@ -260,7 +259,6 @@ private: void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs); - void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs); void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs); @@ -279,11 +277,13 @@ private: VkBuffer DefaultBuffer(); - Core::System& system; - Core::Frontend::EmuWindow& render_window; + Tegra::GPU& gpu; + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + VKScreenInfo& screen_info; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; StateTracker& state_tracker; VKScheduler& scheduler; @@ -300,8 +300,8 @@ private: VKPipelineCache pipeline_cache; VKBufferCache buffer_cache; VKSamplerCache sampler_cache; - VKFenceManager fence_manager; VKQueryCache query_cache; + VKFenceManager fence_manager; vk::Buffer default_buffer; VKMemoryCommit default_buffer_commit; diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp deleted file mode 100644 index f19330a36..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp +++ /dev/null @@ -1,311 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <optional> -#include "common/assert.h" -#include "common/logging/log.h" -#include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -namespace { - -// TODO(Rodrigo): Fine tune these numbers. -constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000; -constexpr std::size_t FENCES_GROW_STEP = 0x40; - -constexpr VkFenceCreateInfo BuildFenceCreateInfo() { - return { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - }; -} - -} // Anonymous namespace - -class CommandBufferPool final : public VKFencedPool { -public: - explicit CommandBufferPool(const VKDevice& device) - : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {} - - void Allocate(std::size_t begin, std::size_t end) override { - // Command buffers are going to be commited, recorded, executed every single usage cycle. - // They are also going to be reseted when commited. - Pool& pool = pools.emplace_back(); - pool.handle = device.GetLogical().CreateCommandPool({ - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | - VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = device.GetGraphicsFamily(), - }); - pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE); - } - - VkCommandBuffer Commit(VKFence& fence) { - const std::size_t index = CommitResource(fence); - const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE; - const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE; - return pools[pool_index].cmdbufs[sub_index]; - } - -private: - struct Pool { - vk::CommandPool handle; - vk::CommandBuffers cmdbufs; - }; - - const VKDevice& device; - std::vector<Pool> pools; -}; - -VKResource::VKResource() = default; - -VKResource::~VKResource() = default; - -VKFence::VKFence(const VKDevice& device) - : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {} - -VKFence::~VKFence() = default; - -void VKFence::Wait() { - switch (const VkResult result = handle.Wait()) { - case VK_SUCCESS: - return; - case VK_ERROR_DEVICE_LOST: - device.ReportLoss(); - [[fallthrough]]; - default: - throw vk::Exception(result); - } -} - -void VKFence::Release() { - ASSERT(is_owned); - is_owned = false; -} - -void VKFence::Commit() { - is_owned = true; - is_used = true; -} - -bool VKFence::Tick(bool gpu_wait, bool owner_wait) { - if (!is_used) { - // If a fence is not used it's always free. - return true; - } - if (is_owned && !owner_wait) { - // The fence is still being owned (Release has not been called) and ownership wait has - // not been asked. - return false; - } - - if (gpu_wait) { - // Wait for the fence if it has been requested. - (void)handle.Wait(); - } else { - if (handle.GetStatus() != VK_SUCCESS) { - // Vulkan fence is not ready, not much it can do here - return false; - } - } - - // Broadcast resources their free state. - for (auto* resource : protected_resources) { - resource->OnFenceRemoval(this); - } - protected_resources.clear(); - - // Prepare fence for reusage. - handle.Reset(); - is_used = false; - return true; -} - -void VKFence::Protect(VKResource* resource) { - protected_resources.push_back(resource); -} - -void VKFence::Unprotect(VKResource* resource) { - const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource); - ASSERT(it != protected_resources.end()); - - resource->OnFenceRemoval(this); - protected_resources.erase(it); -} - -void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept { - std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource, - new_resource); -} - -VKFenceWatch::VKFenceWatch() = default; - -VKFenceWatch::VKFenceWatch(VKFence& initial_fence) { - Watch(initial_fence); -} - -VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept { - fence = std::exchange(rhs.fence, nullptr); - if (fence) { - fence->RedirectProtection(&rhs, this); - } -} - -VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept { - fence = std::exchange(rhs.fence, nullptr); - if (fence) { - fence->RedirectProtection(&rhs, this); - } - return *this; -} - -VKFenceWatch::~VKFenceWatch() { - if (fence) { - fence->Unprotect(this); - } -} - -void VKFenceWatch::Wait() { - if (fence == nullptr) { - return; - } - fence->Wait(); - fence->Unprotect(this); -} - -void VKFenceWatch::Watch(VKFence& new_fence) { - Wait(); - fence = &new_fence; - fence->Protect(this); -} - -bool VKFenceWatch::TryWatch(VKFence& new_fence) { - if (fence) { - return false; - } - fence = &new_fence; - fence->Protect(this); - return true; -} - -void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) { - ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence"); - fence = nullptr; -} - -VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {} - -VKFencedPool::~VKFencedPool() = default; - -std::size_t VKFencedPool::CommitResource(VKFence& fence) { - const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> { - for (std::size_t iterator = begin; iterator < end; ++iterator) { - if (watches[iterator]->TryWatch(fence)) { - // The resource is now being watched, a free resource was successfully found. - return iterator; - } - } - return {}; - }; - // Try to find a free resource from the hinted position to the end. - auto found = Search(free_iterator, watches.size()); - if (!found) { - // Search from beginning to the hinted position. - found = Search(0, free_iterator); - if (!found) { - // Both searches failed, the pool is full; handle it. - const std::size_t free_resource = ManageOverflow(); - - // Watch will wait for the resource to be free. - watches[free_resource]->Watch(fence); - found = free_resource; - } - } - // Free iterator is hinted to the resource after the one that's been commited. - free_iterator = (*found + 1) % watches.size(); - return *found; -} - -std::size_t VKFencedPool::ManageOverflow() { - const std::size_t old_capacity = watches.size(); - Grow(); - - // The last entry is guaranted to be free, since it's the first element of the freshly - // allocated resources. - return old_capacity; -} - -void VKFencedPool::Grow() { - const std::size_t old_capacity = watches.size(); - watches.resize(old_capacity + grow_step); - std::generate(watches.begin() + old_capacity, watches.end(), - []() { return std::make_unique<VKFenceWatch>(); }); - Allocate(old_capacity, old_capacity + grow_step); -} - -VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} { - GrowFences(FENCES_GROW_STEP); - command_buffer_pool = std::make_unique<CommandBufferPool>(device); -} - -VKResourceManager::~VKResourceManager() = default; - -VKFence& VKResourceManager::CommitFence() { - const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* { - const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); }; - const auto hinted = fences.begin() + fences_iterator; - - auto it = std::find_if(hinted, fences.end(), Tick); - if (it == fences.end()) { - it = std::find_if(fences.begin(), hinted, Tick); - if (it == hinted) { - return nullptr; - } - } - fences_iterator = std::distance(fences.begin(), it) + 1; - if (fences_iterator >= fences.size()) - fences_iterator = 0; - - auto& fence = *it; - fence->Commit(); - return fence.get(); - }; - - VKFence* found_fence = StepFences(false, false); - if (!found_fence) { - // Try again, this time waiting. - found_fence = StepFences(true, false); - - if (!found_fence) { - // Allocate new fences and try again. - LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(), - fences.size() + FENCES_GROW_STEP); - - GrowFences(FENCES_GROW_STEP); - found_fence = StepFences(true, false); - ASSERT(found_fence != nullptr); - } - } - return *found_fence; -} - -VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) { - return command_buffer_pool->Commit(fence); -} - -void VKResourceManager::GrowFences(std::size_t new_fences_count) { - const std::size_t previous_size = fences.size(); - fences.resize(previous_size + new_fences_count); - - std::generate(fences.begin() + previous_size, fences.end(), - [this] { return std::make_unique<VKFence>(device); }); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h deleted file mode 100644 index f683d2276..000000000 --- a/src/video_core/renderer_vulkan/vk_resource_manager.h +++ /dev/null @@ -1,196 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <cstddef> -#include <memory> -#include <vector> -#include "video_core/renderer_vulkan/wrapper.h" - -namespace Vulkan { - -class VKDevice; -class VKFence; -class VKResourceManager; - -class CommandBufferPool; - -/// Interface for a Vulkan resource -class VKResource { -public: - explicit VKResource(); - virtual ~VKResource(); - - /** - * Signals the object that an owning fence has been signaled. - * @param signaling_fence Fence that signals its usage end. - */ - virtual void OnFenceRemoval(VKFence* signaling_fence) = 0; -}; - -/** - * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access. - * They must be commited from the resource manager. Their usage flow is: commit the fence from the - * resource manager, protect resources with it and use them, send the fence to an execution queue - * and Wait for it if needed and then call Release. Used resources will automatically be signaled - * when they are free to be reused. - * @brief Protects resources for concurrent usage and signals its release. - */ -class VKFence { - friend class VKResourceManager; - -public: - explicit VKFence(const VKDevice& device); - ~VKFence(); - - /** - * Waits for the fence to be signaled. - * @warning You must have ownership of the fence and it has to be previously sent to a queue to - * call this function. - */ - void Wait(); - - /** - * Releases ownership of the fence. Pass after it has been sent to an execution queue. - * Unmanaged usage of the fence after the call will result in undefined behavior because it may - * be being used for something else. - */ - void Release(); - - /// Protects a resource with this fence. - void Protect(VKResource* resource); - - /// Removes protection for a resource. - void Unprotect(VKResource* resource); - - /// Redirects one protected resource to a new address. - void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept; - - /// Retreives the fence. - operator VkFence() const { - return *handle; - } - -private: - /// Take ownership of the fence. - void Commit(); - - /** - * Updates the fence status. - * @warning Waiting for the owner might soft lock the execution. - * @param gpu_wait Wait for the fence to be signaled by the driver. - * @param owner_wait Wait for the owner to signal its freedom. - * @returns True if the fence is free. Waiting for gpu and owner will always return true. - */ - bool Tick(bool gpu_wait, bool owner_wait); - - const VKDevice& device; ///< Device handler - vk::Fence handle; ///< Vulkan fence - std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence - bool is_owned = false; ///< The fence has been commited but not released yet. - bool is_used = false; ///< The fence has been commited but it has not been checked to be free. -}; - -/** - * A fence watch is used to keep track of the usage of a fence and protect a resource or set of - * resources without having to inherit VKResource from their handlers. - */ -class VKFenceWatch final : public VKResource { -public: - explicit VKFenceWatch(); - VKFenceWatch(VKFence& initial_fence); - VKFenceWatch(VKFenceWatch&&) noexcept; - VKFenceWatch(const VKFenceWatch&) = delete; - ~VKFenceWatch() override; - - VKFenceWatch& operator=(VKFenceWatch&&) noexcept; - - /// Waits for the fence to be released. - void Wait(); - - /** - * Waits for a previous fence and watches a new one. - * @param new_fence New fence to wait to. - */ - void Watch(VKFence& new_fence); - - /** - * Checks if it's currently being watched and starts watching it if it's available. - * @returns True if a watch has started, false if it's being watched. - */ - bool TryWatch(VKFence& new_fence); - - void OnFenceRemoval(VKFence* signaling_fence) override; - - /** - * Do not use it paired with Watch. Use TryWatch instead. - * Returns true when the watch is free. - */ - bool IsUsed() const { - return fence != nullptr; - } - -private: - VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free. -}; - -/** - * Handles a pool of resources protected by fences. Manages resource overflow allocating more - * resources. - */ -class VKFencedPool { -public: - explicit VKFencedPool(std::size_t grow_step); - virtual ~VKFencedPool(); - -protected: - /** - * Commits a free resource and protects it with a fence. It may allocate new resources. - * @param fence Fence that protects the commited resource. - * @returns Index of the resource commited. - */ - std::size_t CommitResource(VKFence& fence); - - /// Called when a chunk of resources have to be allocated. - virtual void Allocate(std::size_t begin, std::size_t end) = 0; - -private: - /// Manages pool overflow allocating new resources. - std::size_t ManageOverflow(); - - /// Allocates a new page of resources. - void Grow(); - - std::size_t grow_step = 0; ///< Number of new resources created after an overflow - std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found - std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources -}; - -/** - * The resource manager handles all resources that can be protected with a fence avoiding - * driver-side or GPU-side concurrent usage. Usage is documented in VKFence. - */ -class VKResourceManager final { -public: - explicit VKResourceManager(const VKDevice& device); - ~VKResourceManager(); - - /// Commits a fence. It has to be sent to a queue and released. - VKFence& CommitFence(); - - /// Commits an unused command buffer and protects it with a fence. - VkCommandBuffer CommitCommandBuffer(VKFence& fence); - -private: - /// Allocates new fences. - void GrowFences(std::size_t new_fences_count); - - const VKDevice& device; ///< Device handler. - std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found. - std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences. - std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers. -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp new file mode 100644 index 000000000..ee274ac59 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp @@ -0,0 +1,63 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <optional> + +#include "video_core/renderer_vulkan/vk_master_semaphore.h" +#include "video_core/renderer_vulkan/vk_resource_pool.h" + +namespace Vulkan { + +ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_) + : master_semaphore{master_semaphore_}, grow_step{grow_step_} {} + +ResourcePool::~ResourcePool() = default; + +size_t ResourcePool::CommitResource() { + // Refresh semaphore to query updated results + master_semaphore.Refresh(); + + const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> { + for (size_t iterator = begin; iterator < end; ++iterator) { + if (master_semaphore.IsFree(ticks[iterator])) { + ticks[iterator] = master_semaphore.CurrentTick(); + return iterator; + } + } + return {}; + }; + // Try to find a free resource from the hinted position to the end. + auto found = search(free_iterator, ticks.size()); + if (!found) { + // Search from beginning to the hinted position. + found = search(0, free_iterator); + if (!found) { + // Both searches failed, the pool is full; handle it. + const size_t free_resource = ManageOverflow(); + + ticks[free_resource] = master_semaphore.CurrentTick(); + found = free_resource; + } + } + // Free iterator is hinted to the resource after the one that's been commited. + free_iterator = (*found + 1) % ticks.size(); + return *found; +} + +size_t ResourcePool::ManageOverflow() { + const size_t old_capacity = ticks.size(); + Grow(); + + // The last entry is guaranted to be free, since it's the first element of the freshly + // allocated resources. + return old_capacity; +} + +void ResourcePool::Grow() { + const size_t old_capacity = ticks.size(); + ticks.resize(old_capacity + grow_step); + Allocate(old_capacity, old_capacity + grow_step); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h new file mode 100644 index 000000000..a018c7ec2 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_resource_pool.h @@ -0,0 +1,43 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <vector> + +#include "common/common_types.h" + +namespace Vulkan { + +class MasterSemaphore; + +/** + * Handles a pool of resources protected by fences. Manages resource overflow allocating more + * resources. + */ +class ResourcePool { +public: + explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step); + virtual ~ResourcePool(); + +protected: + size_t CommitResource(); + + /// Called when a chunk of resources have to be allocated. + virtual void Allocate(size_t begin, size_t end) = 0; + +private: + /// Manages pool overflow allocating new resources. + size_t ManageOverflow(); + + /// Allocates a new page of resources. + void Grow(); + + MasterSemaphore& master_semaphore; + size_t grow_step = 0; ///< Number of new resources created after an overflow + size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found + std::vector<u64> ticks; ///< Ticks for each resource +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index dbbd0961a..1a483dc71 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -10,9 +10,10 @@ #include "common/microprofile.h" #include "common/thread.h" +#include "video_core/renderer_vulkan/vk_command_pool.h" #include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_master_semaphore.h" #include "video_core/renderer_vulkan/vk_query_cache.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -35,10 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) { last = nullptr; } -VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, - StateTracker& state_tracker) - : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker}, - next_fence{&resource_manager.CommitFence()} { +VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_) + : device{device_}, state_tracker{state_tracker_}, + master_semaphore{std::make_unique<MasterSemaphore>(device)}, + command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} { AcquireNewChunk(); AllocateNewContext(); worker_thread = std::thread(&VKScheduler::WorkerThread, this); @@ -50,20 +51,27 @@ VKScheduler::~VKScheduler() { worker_thread.join(); } -void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) { +u64 VKScheduler::CurrentTick() const noexcept { + return master_semaphore->CurrentTick(); +} + +bool VKScheduler::IsFree(u64 tick) const noexcept { + return master_semaphore->IsFree(tick); +} + +void VKScheduler::Wait(u64 tick) { + master_semaphore->Wait(tick); +} + +void VKScheduler::Flush(VkSemaphore semaphore) { SubmitExecution(semaphore); - if (release_fence) { - current_fence->Release(); - } AllocateNewContext(); } -void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) { +void VKScheduler::Finish(VkSemaphore semaphore) { + const u64 presubmit_tick = CurrentTick(); SubmitExecution(semaphore); - current_fence->Wait(); - if (release_fence) { - current_fence->Release(); - } + Wait(presubmit_tick); AllocateNewContext(); } @@ -160,18 +168,38 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { current_cmdbuf.End(); + const VkSemaphore timeline_semaphore = master_semaphore->Handle(); + const u32 num_signal_semaphores = semaphore ? 2U : 1U; + + const u64 signal_value = master_semaphore->CurrentTick(); + const u64 wait_value = signal_value - 1; + const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + + master_semaphore->NextTick(); + + const std::array signal_values{signal_value, u64(0)}; + const std::array signal_semaphores{timeline_semaphore, semaphore}; + + const VkTimelineSemaphoreSubmitInfoKHR timeline_si{ + .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR, + .pNext = nullptr, + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &wait_value, + .signalSemaphoreValueCount = num_signal_semaphores, + .pSignalSemaphoreValues = signal_values.data(), + }; const VkSubmitInfo submit_info{ .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = nullptr, - .waitSemaphoreCount = 0, - .pWaitSemaphores = nullptr, - .pWaitDstStageMask = nullptr, + .pNext = &timeline_si, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &timeline_semaphore, + .pWaitDstStageMask = &wait_stage_mask, .commandBufferCount = 1, .pCommandBuffers = current_cmdbuf.address(), - .signalSemaphoreCount = semaphore ? 1U : 0U, - .pSignalSemaphores = &semaphore, + .signalSemaphoreCount = num_signal_semaphores, + .pSignalSemaphores = signal_semaphores.data(), }; - switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) { + switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) { case VK_SUCCESS: break; case VK_ERROR_DEVICE_LOST: @@ -183,14 +211,9 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) { } void VKScheduler::AllocateNewContext() { - ++ticks; - std::unique_lock lock{mutex}; - current_fence = next_fence; - next_fence = &resource_manager.CommitFence(); - current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence), - device.GetDispatchLoader()); + current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader()); current_cmdbuf.Begin({ .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 970a65566..7be8a19f0 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -16,42 +16,33 @@ namespace Vulkan { +class CommandPool; +class MasterSemaphore; class StateTracker; class VKDevice; -class VKFence; class VKQueryCache; -class VKResourceManager; - -class VKFenceView { -public: - VKFenceView() = default; - VKFenceView(VKFence* const& fence) : fence{fence} {} - - VKFence* operator->() const noexcept { - return fence; - } - - operator VKFence&() const noexcept { - return *fence; - } - -private: - VKFence* const& fence; -}; /// The scheduler abstracts command buffer and fence management with an interface that's able to do /// OpenGL-like operations on Vulkan command buffers. class VKScheduler { public: - explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager, - StateTracker& state_tracker); + explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker); ~VKScheduler(); + /// Returns the current command buffer tick. + [[nodiscard]] u64 CurrentTick() const noexcept; + + /// Returns true when a tick has been triggered by the GPU. + [[nodiscard]] bool IsFree(u64 tick) const noexcept; + + /// Waits for the given tick to trigger on the GPU. + void Wait(u64 tick); + /// Sends the current execution context to the GPU. - void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr); + void Flush(VkSemaphore semaphore = nullptr); /// Sends the current execution context to the GPU and waits for it to complete. - void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr); + void Finish(VkSemaphore semaphore = nullptr); /// Waits for the worker thread to finish executing everything. After this function returns it's /// safe to touch worker resources. @@ -86,14 +77,9 @@ public: (void)chunk->Record(command); } - /// Gets a reference to the current fence. - VKFenceView GetFence() const { - return current_fence; - } - - /// Returns the current command buffer tick. - u64 Ticks() const { - return ticks; + /// Returns the master timeline semaphore. + [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept { + return *master_semaphore; } private: @@ -171,6 +157,13 @@ private: std::array<u8, 0x8000> data{}; }; + struct State { + VkRenderPass renderpass = nullptr; + VkFramebuffer framebuffer = nullptr; + VkExtent2D render_area = {0, 0}; + VkPipeline graphics_pipeline = nullptr; + }; + void WorkerThread(); void SubmitExecution(VkSemaphore semaphore); @@ -186,30 +179,23 @@ private: void AcquireNewChunk(); const VKDevice& device; - VKResourceManager& resource_manager; StateTracker& state_tracker; + std::unique_ptr<MasterSemaphore> master_semaphore; + std::unique_ptr<CommandPool> command_pool; + VKQueryCache* query_cache = nullptr; vk::CommandBuffer current_cmdbuf; - VKFence* current_fence = nullptr; - VKFence* next_fence = nullptr; - - struct State { - VkRenderPass renderpass = nullptr; - VkFramebuffer framebuffer = nullptr; - VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; - } state; std::unique_ptr<CommandChunk> chunk; std::thread worker_thread; + State state; Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue; Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve; std::mutex mutex; std::condition_variable cv; - std::atomic<u64> ticks = 0; bool quit = false; }; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 5eca0ab91..2fd3b7f39 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -10,36 +10,18 @@ #include "common/bit_util.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { -VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, - u64 last_epoch) - : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {} +VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_) + : buffer{std::move(buffer_)} {} -VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept { - buffer = std::move(rhs.buffer); - watch = std::move(rhs.watch); - last_epoch = rhs.last_epoch; -} - -VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default; - -VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=( - StagingBuffer&& rhs) noexcept { - buffer = std::move(rhs.buffer); - watch = std::move(rhs.watch); - last_epoch = rhs.last_epoch; - return *this; -} - -VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler) - : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {} +VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_, + VKScheduler& scheduler_) + : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {} VKStagingBufferPool::~VKStagingBufferPool() = default; @@ -51,7 +33,6 @@ VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visib } void VKStagingBufferPool::TickFrame() { - ++epoch; current_delete_level = (current_delete_level + 1) % NumLevels; ReleaseCache(true); @@ -59,11 +40,12 @@ void VKStagingBufferPool::TickFrame() { } VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) { - for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) { - if (entry.watch.TryWatch(scheduler.GetFence())) { - entry.last_epoch = epoch; - return &*entry.buffer; + for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) { + if (!scheduler.IsFree(entry.tick)) { + continue; } + entry.tick = scheduler.CurrentTick(); + return &*entry.buffer; } return nullptr; } @@ -86,8 +68,10 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v }); buffer->commit = memory_manager.Commit(buffer->handle, host_visible); - auto& entries = GetCache(host_visible)[log2].entries; - return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; + std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries; + StagingBuffer& entry = entries.emplace_back(std::move(buffer)); + entry.tick = scheduler.CurrentTick(); + return *entry.buffer; } VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) { @@ -109,9 +93,8 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo auto& entries = staging.entries; const std::size_t old_size = entries.size(); - const auto is_deleteable = [this](const auto& entry) { - static constexpr u64 epochs_to_destroy = 180; - return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); + const auto is_deleteable = [this](const StagingBuffer& entry) { + return scheduler.IsFree(entry.tick); }; const std::size_t begin_offset = staging.delete_index; const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size); diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 3c4901437..2dd5049ac 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -10,13 +10,11 @@ #include "common/common_types.h" #include "video_core/renderer_vulkan/vk_memory_manager.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Vulkan { class VKDevice; -class VKFenceWatch; class VKScheduler; struct VKBuffer final { @@ -36,16 +34,10 @@ public: private: struct StagingBuffer final { - explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch); - StagingBuffer(StagingBuffer&& rhs) noexcept; - StagingBuffer(const StagingBuffer&) = delete; - ~StagingBuffer(); - - StagingBuffer& operator=(StagingBuffer&& rhs) noexcept; + explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer); std::unique_ptr<VKBuffer> buffer; - VKFenceWatch watch; - u64 last_epoch = 0; + u64 tick = 0; }; struct StagingBuffers final { @@ -73,8 +65,6 @@ private: StagingBuffersCache host_staging_buffers; StagingBuffersCache device_staging_buffers; - u64 epoch = 0; - std::size_t current_delete_level = 0; }; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 9151d9fb1..5d2c4a796 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() { flags[DepthWriteEnable] = true; flags[DepthCompareOp] = true; flags[FrontFace] = true; - flags[PrimitiveTopology] = true; flags[StencilOp] = true; flags[StencilTestEnable] = true; return flags; @@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) { table[OFF(screen_y_control)] = FrontFace; } -void SetupDirtyPrimitiveTopology(Tables& tables) { - tables[0][OFF(draw.topology)] = PrimitiveTopology; -} - void SetupDirtyStencilOp(Tables& tables) { auto& table = tables[0]; table[OFF(stencil_front_op_fail)] = StencilOp; @@ -137,12 +132,9 @@ void SetupDirtyStencilTestEnable(Tables& tables) { } // Anonymous namespace -StateTracker::StateTracker(Core::System& system) - : system{system}, invalidation_flags{MakeInvalidationFlags()} {} - -void StateTracker::Initialize() { - auto& dirty = system.GPU().Maxwell3D().dirty; - auto& tables = dirty.tables; +StateTracker::StateTracker(Tegra::GPU& gpu) + : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} { + auto& tables = gpu.Maxwell3D().dirty.tables; SetupDirtyRenderTargets(tables); SetupDirtyViewports(tables); SetupDirtyScissors(tables); @@ -156,13 +148,8 @@ void StateTracker::Initialize() { SetupDirtyDepthWriteEnable(tables); SetupDirtyDepthCompareOp(tables); SetupDirtyFrontFace(tables); - SetupDirtyPrimitiveTopology(tables); SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); } -void StateTracker::InvalidateCommandBufferState() { - system.GPU().Maxwell3D().dirty.flags |= invalidation_flags; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 54ca0d6c6..1de789e57 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -32,7 +32,6 @@ enum : u8 { DepthWriteEnable, DepthCompareOp, FrontFace, - PrimitiveTopology, StencilOp, StencilTestEnable, @@ -43,12 +42,15 @@ static_assert(Last <= std::numeric_limits<u8>::max()); } // namespace Dirty class StateTracker { -public: - explicit StateTracker(Core::System& system); + using Maxwell = Tegra::Engines::Maxwell3D::Regs; - void Initialize(); +public: + explicit StateTracker(Tegra::GPU& gpu); - void InvalidateCommandBufferState(); + void InvalidateCommandBufferState() { + flags |= invalidation_flags; + current_topology = INVALID_TOPOLOGY; + } bool TouchViewports() { return Exchange(Dirty::Viewports, false); @@ -102,10 +104,6 @@ public: return Exchange(Dirty::FrontFace, false); } - bool TouchPrimitiveTopology() { - return Exchange(Dirty::PrimitiveTopology, false); - } - bool TouchStencilOp() { return Exchange(Dirty::StencilOp, false); } @@ -114,16 +112,24 @@ public: return Exchange(Dirty::StencilTestEnable, false); } + bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) { + const bool has_changed = current_topology != new_topology; + current_topology = new_topology; + return has_changed; + } + private: + static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); + bool Exchange(std::size_t id, bool new_value) const noexcept { - auto& flags = system.GPU().Maxwell3D().dirty.flags; const bool is_dirty = flags[id]; flags[id] = new_value; return is_dirty; } - Core::System& system; + Tegra::Engines::Maxwell3D::DirtyState::Flags& flags; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; + Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index a5526a3f5..1b59612b9 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -11,7 +11,6 @@ #include "common/alignment.h" #include "common/assert.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -57,9 +56,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, } // Anonymous namespace -VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, +VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_, VkBufferUsageFlags usage) - : device{device}, scheduler{scheduler} { + : device{device_}, scheduler{scheduler_} { CreateBuffers(usage); ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); @@ -111,7 +110,7 @@ void VKStreamBuffer::Unmap(u64 size) { } auto& watch = current_watches[current_watch_cursor++]; watch.upper_bound = offset; - watch.fence.Watch(scheduler.GetFence()); + watch.tick = scheduler.CurrentTick(); } void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { @@ -121,7 +120,8 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { // Substract from the preferred heap size some bytes to avoid getting out of memory. const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; - const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024; + // As per DXVK's example, using `heap_size / 2` + const VkDeviceSize allocable_size = heap_size / 2; buffer = device.GetLogical().CreateBuffer({ .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -157,7 +157,7 @@ void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { auto& watch = previous_watches[wait_cursor]; wait_bound = watch.upper_bound; - watch.fence.Wait(); + scheduler.Wait(watch.tick); ++wait_cursor; } } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 689f0d276..5e15ad78f 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -14,7 +14,6 @@ namespace Vulkan { class VKDevice; -class VKFence; class VKFenceWatch; class VKScheduler; @@ -44,8 +43,8 @@ public: } private: - struct Watch final { - VKFenceWatch fence; + struct Watch { + u64 tick{}; u64 upper_bound{}; }; diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp index 6bfd2abae..9636a7c65 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.cpp +++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp @@ -12,7 +12,7 @@ #include "core/core.h" #include "core/frontend/framebuffer_layout.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_swapchain.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -56,8 +56,8 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi } // Anonymous namespace -VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device) - : surface{surface}, device{device} {} +VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_) + : surface{surface_}, device{device_}, scheduler{scheduler_} {} VKSwapchain::~VKSwapchain() = default; @@ -75,21 +75,18 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) { CreateSemaphores(); CreateImageViews(); - fences.resize(image_count, nullptr); + resource_ticks.clear(); + resource_ticks.resize(image_count); } void VKSwapchain::AcquireNextImage() { device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(), *present_semaphores[frame_index], {}, &image_index); - if (auto& fence = fences[image_index]; fence) { - fence->Wait(); - fence->Release(); - fence = nullptr; - } + scheduler.Wait(resource_ticks[image_index]); } -bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { +bool VKSwapchain::Present(VkSemaphore render_semaphore) { const VkSemaphore present_semaphore{*present_semaphores[frame_index]}; const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore}; const auto present_queue{device.GetPresentQueue()}; @@ -123,8 +120,7 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) { break; } - ASSERT(fences[image_index] == nullptr); - fences[image_index] = &fence; + resource_ticks[image_index] = scheduler.CurrentTick(); frame_index = (frame_index + 1) % static_cast<u32>(image_count); return recreated; } diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h index a35d61345..6b39befdf 100644 --- a/src/video_core/renderer_vulkan/vk_swapchain.h +++ b/src/video_core/renderer_vulkan/vk_swapchain.h @@ -16,11 +16,11 @@ struct FramebufferLayout; namespace Vulkan { class VKDevice; -class VKFence; +class VKScheduler; class VKSwapchain { public: - explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device); + explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler); ~VKSwapchain(); /// Creates (or recreates) the swapchain with a given size. @@ -31,7 +31,7 @@ public: /// Presents the rendered image to the swapchain. Returns true when the swapchains had to be /// recreated. Takes responsability for the ownership of fence. - bool Present(VkSemaphore render_semaphore, VKFence& fence); + bool Present(VkSemaphore render_semaphore); /// Returns true when the framebuffer layout has changed. bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const; @@ -74,6 +74,7 @@ private: const VkSurfaceKHR surface; const VKDevice& device; + VKScheduler& scheduler; vk::SwapchainKHR swapchain; @@ -81,7 +82,7 @@ private: std::vector<VkImage> images; std::vector<vk::ImageView> image_views; std::vector<vk::Framebuffer> framebuffers; - std::vector<VKFence*> fences; + std::vector<u64> resource_ticks; std::vector<vk::Semaphore> present_semaphores; u32 image_index{}; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 2c6f54101..f2c8f2ae1 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -188,12 +188,10 @@ u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::Swizzl } // Anonymous namespace -CachedSurface::CachedSurface(Core::System& system, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, +CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, const SurfaceParams& params) - : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system}, - device{device}, resource_manager{resource_manager}, + : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} { if (params.IsBuffer()) { buffer = CreateBuffer(device, params, host_memory_size); @@ -490,19 +488,20 @@ VkImageView CachedSurfaceView::GetAttachment() { return *render_target; } -VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool) - : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device}, - resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler}, - staging_pool{staging_pool} {} +VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, + Tegra::MemoryManager& gpu_memory, const VKDevice& device_, + VKMemoryManager& memory_manager_, VKScheduler& scheduler_, + VKStagingBufferPool& staging_pool_) + : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()), + device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{ + staging_pool_} {} VKTextureCache::~VKTextureCache() = default; Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { - return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager, - scheduler, staging_pool, gpu_addr, params); + return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool, + gpu_addr, params); } void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 807e26c8a..39202feba 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -15,10 +15,6 @@ #include "video_core/texture_cache/surface_base.h" #include "video_core/texture_cache/texture_cache.h" -namespace Core { -class System; -} - namespace VideoCore { class RasterizerInterface; } @@ -27,7 +23,6 @@ namespace Vulkan { class RasterizerVulkan; class VKDevice; -class VKResourceManager; class VKScheduler; class VKStagingBufferPool; @@ -45,8 +40,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> { friend CachedSurfaceView; public: - explicit CachedSurface(Core::System& system, const VKDevice& device, - VKResourceManager& resource_manager, VKMemoryManager& memory_manager, + explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler, VKStagingBufferPool& staging_pool, GPUVAddr gpu_addr, const SurfaceParams& params); ~CachedSurface(); @@ -101,9 +95,7 @@ private: VkImageSubresourceRange GetImageSubresourceRange() const; - Core::System& system; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; @@ -201,10 +193,10 @@ private: class VKTextureCache final : public TextureCacheBase { public: - explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const VKDevice& device, VKResourceManager& resource_manager, - VKMemoryManager& memory_manager, VKScheduler& scheduler, - VKStagingBufferPool& staging_pool); + explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer, + Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKTextureCache(); private: @@ -219,7 +211,6 @@ private: void BufferCopy(Surface& src_surface, Surface& dst_surface) override; const VKDevice& device; - VKResourceManager& resource_manager; VKMemoryManager& memory_manager; VKScheduler& scheduler; VKStagingBufferPool& staging_pool; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 56055af1b..c034558a3 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -6,6 +6,7 @@ #include <exception> #include <memory> #include <optional> +#include <string_view> #include <utility> #include <vector> @@ -18,21 +19,42 @@ namespace Vulkan::vk { namespace { +template <typename Func> +void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld, + Func&& func) { + // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap + // functions. + std::stable_sort(devices.begin(), devices.end(), + [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) { + return func(vk::PhysicalDevice(lhs, dld).GetProperties(), + vk::PhysicalDevice(rhs, dld).GetProperties()); + }); +} + +void SortPhysicalDevicesPerVendor(std::vector<VkPhysicalDevice>& devices, + const InstanceDispatch& dld, + std::initializer_list<u32> vendor_ids) { + for (auto it = vendor_ids.end(); it != vendor_ids.begin();) { + --it; + SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) { + return lhs.vendorID == id && rhs.vendorID != id; + }); + } +} + void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) { - std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) { - // This will call Vulkan more than needed, but these calls are cheap. - const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties(); - const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties(); - - // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest. - const bool preferred = - (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && - rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) || - (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) || - (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) || - (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086); - return !preferred; + // Sort by name, this will set a base and make GPUs with higher numbers appear first + // (e.g. GTX 1650 will intentionally be listed before a GTX 1080). + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName}; }); + // Prefer discrete over non-discrete + SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) { + return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU && + rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU; + }); + // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest. + SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086}); } template <typename T> @@ -149,6 +171,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkGetFenceStatus); X(vkGetImageMemoryRequirements); X(vkGetQueryPoolResults); + X(vkGetSemaphoreCounterValueKHR); X(vkMapMemory); X(vkQueueSubmit); X(vkResetFences); @@ -157,6 +180,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkUpdateDescriptorSetWithTemplateKHR); X(vkUpdateDescriptorSets); X(vkWaitForFences); + X(vkWaitSemaphoresKHR); #undef X } @@ -263,6 +287,22 @@ const char* ToString(VkResult result) noexcept { return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT"; case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT: return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT"; + case VkResult::VK_ERROR_UNKNOWN: + return "VK_ERROR_UNKNOWN"; + case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR: + return "VK_ERROR_INCOMPATIBLE_VERSION_KHR"; + case VkResult::VK_THREAD_IDLE_KHR: + return "VK_THREAD_IDLE_KHR"; + case VkResult::VK_THREAD_DONE_KHR: + return "VK_THREAD_DONE_KHR"; + case VkResult::VK_OPERATION_DEFERRED_KHR: + return "VK_OPERATION_DEFERRED_KHR"; + case VkResult::VK_OPERATION_NOT_DEFERRED_KHR: + return "VK_OPERATION_NOT_DEFERRED_KHR"; + case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT: + return "VK_PIPELINE_COMPILE_REQUIRED_EXT"; + case VkResult::VK_RESULT_MAX_ENUM: + return "VK_RESULT_MAX_ENUM"; } return "Unknown"; } @@ -558,7 +598,10 @@ Semaphore Device::CreateSemaphore() const { .pNext = nullptr, .flags = 0, }; + return CreateSemaphore(ci); +} +Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const { VkSemaphore object; Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object)); return Semaphore(object, handle, *dld); @@ -644,7 +687,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons return ShaderModule(object, handle, *dld); } -Event Device::CreateNewEvent() const { +Event Device::CreateEvent() const { static constexpr VkEventCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 748a94d2f..f64919623 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -267,6 +267,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkGetFenceStatus vkGetFenceStatus; PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements; PFN_vkGetQueryPoolResults vkGetQueryPoolResults; + PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR; PFN_vkMapMemory vkMapMemory; PFN_vkQueueSubmit vkQueueSubmit; PFN_vkResetFences vkResetFences; @@ -275,6 +276,7 @@ struct DeviceDispatch : public InstanceDispatch { PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR; PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets; PFN_vkWaitForFences vkWaitForFences; + PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR; }; /// Loads instance agnostic function pointers. @@ -550,7 +552,6 @@ using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>; using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>; using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>; using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>; -using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>; using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>; using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>; @@ -582,7 +583,8 @@ public: /// Construct a queue handle. constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {} - VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept { + VkResult Submit(Span<VkSubmitInfo> submit_infos, + VkFence fence = VK_NULL_HANDLE) const noexcept { return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence); } @@ -674,6 +676,44 @@ public: } }; +class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> { + using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle; + +public: + [[nodiscard]] u64 GetCounter() const { + u64 value; + Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value)); + return value; + } + + /** + * Waits for a timeline semaphore on the host. + * + * @param value Value to wait + * @param timeout Time in nanoseconds to timeout + * @return True on successful wait, false on timeout + */ + bool Wait(u64 value, u64 timeout = std::numeric_limits<u64>::max()) const { + const VkSemaphoreWaitInfoKHR wait_info{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR, + .pNext = nullptr, + .flags = 0, + .semaphoreCount = 1, + .pSemaphores = &handle, + .pValues = &value, + }; + const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout); + switch (result) { + case VK_SUCCESS: + return true; + case VK_TIMEOUT: + return false; + default: + throw Exception(result); + } + } +}; + class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> { using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle; @@ -694,6 +734,8 @@ public: Semaphore CreateSemaphore() const; + Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const; + Fence CreateFence(const VkFenceCreateInfo& ci) const; DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const; @@ -721,7 +763,7 @@ public: ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; - Event CreateNewEvent() const; + Event CreateEvent() const; SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h index cca13bcde..8e5a22ab3 100644 --- a/src/video_core/shader/ast.h +++ b/src/video_core/shader/ast.h @@ -199,55 +199,48 @@ public: } std::optional<u32> GetGotoLabel() const { - auto inner = std::get_if<ASTGoto>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTGoto>(&data)) { return {inner->label}; } - return {}; + return std::nullopt; } Expr GetGotoCondition() const { - auto inner = std::get_if<ASTGoto>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTGoto>(&data)) { return inner->condition; } return nullptr; } void MarkLabelUnused() { - auto inner = std::get_if<ASTLabel>(&data); - if (inner) { + if (auto* inner = std::get_if<ASTLabel>(&data)) { inner->unused = true; } } bool IsLabelUnused() const { - auto inner = std::get_if<ASTLabel>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTLabel>(&data)) { return inner->unused; } return true; } std::optional<u32> GetLabelIndex() const { - auto inner = std::get_if<ASTLabel>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTLabel>(&data)) { return {inner->index}; } - return {}; + return std::nullopt; } Expr GetIfCondition() const { - auto inner = std::get_if<ASTIfThen>(&data); - if (inner) { + if (const auto* inner = std::get_if<ASTIfThen>(&data)) { return inner->condition; } return nullptr; } void SetGotoCondition(Expr new_condition) { - auto inner = std::get_if<ASTGoto>(&data); - if (inner) { + if (auto* inner = std::get_if<ASTGoto>(&data)) { inner->condition = std::move(new_condition); } } diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index f815584f7..aabd62c5c 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -73,11 +73,11 @@ void AsyncShaders::KillWorkers() { worker_threads.clear(); } -bool AsyncShaders::HasWorkQueued() { +bool AsyncShaders::HasWorkQueued() const { return !pending_queue.empty(); } -bool AsyncShaders::HasCompletedWork() { +bool AsyncShaders::HasCompletedWork() const { std::shared_lock lock{completed_mutex}; return !finished_work.empty(); } @@ -102,7 +102,7 @@ bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const { } std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() { - std::vector<AsyncShaders::Result> results; + std::vector<Result> results; { std::unique_lock lock{completed_mutex}; results.assign(std::make_move_iterator(finished_work.begin()), diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index d5ae814d5..7a99e1dc5 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -5,11 +5,21 @@ #pragma once #include <condition_variable> -#include <deque> #include <memory> #include <shared_mutex> #include <thread> -#include "common/bit_field.h" + +// This header includes both Vulkan and OpenGL headers, this has to be fixed +// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues. +// Forcefully include glad early and undefine macros +#include <glad/glad.h> +#ifdef CreateEvent +#undef CreateEvent +#endif +#ifdef CreateSemaphore +#undef CreateSemaphore +#endif + #include "common/common_types.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" @@ -17,7 +27,6 @@ #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_update_descriptor.h" namespace Core::Frontend { class EmuWindow; @@ -70,20 +79,20 @@ public: void KillWorkers(); /// Check to see if any shaders have actually been compiled - bool HasCompletedWork(); + [[nodiscard]] bool HasCompletedWork() const; /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build /// every shader async as some shaders are only built and executed once. We try to "guess" which /// shader would be used only once - bool IsShaderAsync(const Tegra::GPU& gpu) const; + [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const; /// Pulls completed compiled shaders - std::vector<Result> GetCompletedWork(); + [[nodiscard]] std::vector<Result> GetCompletedWork(); void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type, u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset, - VideoCommon::Shader::CompilerSettings compiler_settings, - const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + CompilerSettings compiler_settings, const Registry& registry, + VAddr cpu_addr); void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, @@ -97,7 +106,7 @@ private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); /// Check our worker queue to see if we have any work queued already - bool HasWorkQueued(); + [[nodiscard]] bool HasWorkQueued() const; struct WorkerParams { Backend backend; @@ -108,8 +117,8 @@ private: std::vector<u64> code; std::vector<u64> code_b; u32 main_offset; - VideoCommon::Shader::CompilerSettings compiler_settings; - std::optional<VideoCommon::Shader::Registry> registry; + CompilerSettings compiler_settings; + std::optional<Registry> registry; VAddr cpu_address; // For Vulkan @@ -125,13 +134,13 @@ private: }; std::condition_variable cv; - std::mutex queue_mutex; - std::shared_mutex completed_mutex; + mutable std::mutex queue_mutex; + mutable std::shared_mutex completed_mutex; std::atomic<bool> is_thread_exiting{}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; std::vector<std::thread> worker_threads; std::queue<WorkerParams> pending_queue; - std::vector<AsyncShaders::Result> finished_work; + std::vector<Result> finished_work; Core::Frontend::EmuWindow& emu_window; }; diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 336397cdb..4c8971615 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -547,13 +547,13 @@ bool TryQuery(CFGRebuildState& state) { gather_labels(q2.ssy_stack, state.ssy_labels, block); gather_labels(q2.pbk_stack, state.pbk_labels, block); if (std::holds_alternative<SingleBranch>(*block.branch)) { - const auto branch = std::get_if<SingleBranch>(block.branch.get()); + auto* branch = std::get_if<SingleBranch>(block.branch.get()); if (!branch->condition.IsUnconditional()) { q2.address = block.end + 1; state.queries.push_back(q2); } - Query conditional_query{q2}; + auto& conditional_query = state.queries.emplace_back(q2); if (branch->is_sync) { if (branch->address == unassigned_branch) { branch->address = conditional_query.ssy_stack.top(); @@ -567,21 +567,21 @@ bool TryQuery(CFGRebuildState& state) { conditional_query.pbk_stack.pop(); } conditional_query.address = branch->address; - state.queries.push_back(std::move(conditional_query)); return true; } - const auto multi_branch = std::get_if<MultiBranch>(block.branch.get()); + + const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get()); for (const auto& branch_case : multi_branch->branches) { - Query conditional_query{q2}; + auto& conditional_query = state.queries.emplace_back(q2); conditional_query.address = branch_case.address; - state.queries.push_back(std::move(conditional_query)); } + return true; } void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { - const auto get_expr = ([&](const Condition& cond) -> Expr { - Expr result{}; + const auto get_expr = [](const Condition& cond) -> Expr { + Expr result; if (cond.cc != ConditionCode::T) { result = MakeExpr<ExprCondCode>(cond.cc); } @@ -594,10 +594,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { } Expr extra = MakeExpr<ExprPredicate>(pred); if (negate) { - extra = MakeExpr<ExprNot>(extra); + extra = MakeExpr<ExprNot>(std::move(extra)); } if (result) { - return MakeExpr<ExprAnd>(extra, result); + return MakeExpr<ExprAnd>(std::move(extra), std::move(result)); } return extra; } @@ -605,9 +605,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { return result; } return MakeExpr<ExprBoolean>(true); - }); + }; + if (std::holds_alternative<SingleBranch>(*branch_info)) { - const auto branch = std::get_if<SingleBranch>(branch_info.get()); + const auto* branch = std::get_if<SingleBranch>(branch_info.get()); if (branch->address < 0) { if (branch->kill) { mm.InsertReturn(get_expr(branch->condition), true); @@ -619,7 +620,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) { mm.InsertGoto(get_expr(branch->condition), branch->address); return; } - const auto multi_branch = std::get_if<MultiBranch>(branch_info.get()); + const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get()); for (const auto& branch_case : multi_branch->branches) { mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value), branch_case.address); diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp index a276aee44..88103fede 100644 --- a/src/video_core/shader/decode/arithmetic_half.cpp +++ b/src/video_core/shader/decode/arithmetic_half.cpp @@ -53,6 +53,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) { absolute_a = ((instr.value >> 44) & 1) != 0; absolute_b = ((instr.value >> 54) & 1) != 0; break; + default: + UNREACHABLE(); + break; } Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a); diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp index 73880db0e..2a30aab2b 100644 --- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp +++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp @@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { case OpCode::Id::IADD32I: { UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented"); - op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true); + op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true); - const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b); + Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b)); - SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc); - SetRegister(bb, instr.gpr0, value); + SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0); + SetRegister(bb, instr.gpr0, std::move(value)); break; } case OpCode::Id::LOP32I: { - if (instr.alu.lop32i.invert_a) - op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a); + if (instr.alu.lop32i.invert_a) { + op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a)); + } - if (instr.alu.lop32i.invert_b) - op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b); + if (instr.alu.lop32i.invert_b) { + op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b)); + } - WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b, - PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc); + WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a), + std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex, + instr.op_32.generates_cc != 0); break; } default: @@ -58,14 +61,14 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) { void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a, Node op_b, PredicateResultMode predicate_mode, Pred predicate, bool sets_cc) { - const Node result = [&]() { + Node result = [&] { switch (logic_op) { case LogicOperation::And: - return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::Or: - return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::Xor: - return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b); + return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b)); case LogicOperation::PassB: return op_b; default: @@ -84,8 +87,8 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation return; case PredicateResultMode::NotZero: { // Set the predicate to true if the result is not zero. - const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0)); - SetPredicate(bb, static_cast<u64>(predicate), compare); + Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0)); + SetPredicate(bb, static_cast<u64>(predicate), std::move(compare)); break; } default: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index e75ca4fdb..618d309d2 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -119,6 +119,8 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, return descriptor.r_type; } break; + default: + break; } UNIMPLEMENTED_MSG("Texture format not implemented={}", format); return ComponentType::FLOAT; @@ -220,9 +222,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) { return (component == 0 || component == 1) ? 8 : 0; case TextureFormat::G4R4: return (component == 0 || component == 1) ? 4 : 0; + default: + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return 0; } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return 0; } std::size_t GetImageComponentMask(TextureFormat format) { @@ -257,9 +260,10 @@ std::size_t GetImageComponentMask(TextureFormat format) { case TextureFormat::R8: case TextureFormat::R1: return std::size_t{R}; + default: + UNIMPLEMENTED_MSG("Texture format not implemented={}", format); + return std::size_t{R | G | B | A}; } - UNIMPLEMENTED_MSG("Texture format not implemented={}", format); - return std::size_t{R | G | B | A}; } std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { @@ -463,7 +467,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { return OperationCode::AtomicImageXor; case Tegra::Shader::ImageAtomicOperation::Exch: return OperationCode::AtomicImageExchange; + default: + break; } + break; default: break; } diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index e4739394d..e2bba88dd 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { break; } case OpCode::Id::RED: { - UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}", + static_cast<int>(instr.red.type.Value())); const auto [real_address, base_address, descriptor] = TrackGlobalMemory(bb, instr, true, true); if (!real_address || !base_address) { diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp index 29ebf65ba..4e932a4b6 100644 --- a/src/video_core/shader/decode/texture.cpp +++ b/src/video_core/shader/decode/texture.cpp @@ -292,33 +292,36 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) { break; } - std::vector<Node> coords; - - // TODO: Add coordinates for different samplers once other texture types are implemented. - switch (texture_type) { - case TextureType::Texture1D: - coords.push_back(GetRegister(instr.gpr8)); - break; - case TextureType::Texture2D: - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); - break; - default: - UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type)); + const u64 base_index = is_array ? 1 : 0; + const u64 num_components = [texture_type] { + switch (texture_type) { + case TextureType::Texture1D: + return 1; + case TextureType::Texture2D: + return 2; + case TextureType::TextureCube: + return 3; + default: + UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type)); + return 2; + } + }(); + // TODO: What's the array component used for? - // Fallback to interpreting as a 2D texture for now - coords.push_back(GetRegister(instr.gpr8.Value() + 0)); - coords.push_back(GetRegister(instr.gpr8.Value() + 1)); + std::vector<Node> coords; + coords.reserve(num_components); + for (u64 component = 0; component < num_components; ++component) { + coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component)); } + u32 indexer = 0; for (u32 element = 0; element < 2; ++element) { if (!instr.tmml.IsComponentEnabled(element)) { continue; } - auto params = coords; MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var}; - const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params)); - SetTemporary(bb, indexer++, value); + Node value = Operation(OperationCode::TextureQueryLod, meta, coords); + SetTemporary(bb, indexer++, std::move(value)); } for (u32 i = 0; i < indexer; ++i) { SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); @@ -763,7 +766,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) { const auto texture_type{instr.tld.texture_type}; - const bool is_array{instr.tld.is_array}; + const bool is_array{instr.tld.is_array != 0}; const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL}; const std::size_t coord_count{GetCoordCount(texture_type)}; diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp index 5071c83ca..e18ccba8e 100644 --- a/src/video_core/shader/memory_util.cpp +++ b/src/video_core/shader/memory_util.cpp @@ -16,11 +16,10 @@ namespace VideoCommon::Shader { -GPUVAddr GetShaderAddress(Core::System& system, +GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) { - const auto& gpu{system.GPU().Maxwell3D()}; - const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; - return gpu.regs.code_address.CodeAddress() + shader_config.offset; + const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]}; + return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset; } bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h index be90d24fd..4624d38e6 100644 --- a/src/video_core/shader/memory_util.h +++ b/src/video_core/shader/memory_util.h @@ -11,10 +11,6 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" -namespace Core { -class System; -} - namespace Tegra { class MemoryManager; } @@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10; constexpr u32 KERNEL_MAIN_OFFSET = 0; /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Core::System& system, +GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::Maxwell3D::Regs::ShaderProgram program); /// Gets if the current instruction offset is a scheduler instruction diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp index cdf274e54..148d91fcb 100644 --- a/src/video_core/shader/registry.cpp +++ b/src/video_core/shader/registry.cpp @@ -24,44 +24,45 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac if (shader_stage == ShaderType::Compute) { return {}; } - auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); - - GraphicsInfo info; - info.tfb_layouts = graphics.regs.tfb_layouts; - info.tfb_varying_locs = graphics.regs.tfb_varying_locs; - info.primitive_topology = graphics.regs.draw.topology; - info.tessellation_primitive = graphics.regs.tess_mode.prim; - info.tessellation_spacing = graphics.regs.tess_mode.spacing; - info.tfb_enabled = graphics.regs.tfb_enabled; - info.tessellation_clockwise = graphics.regs.tess_mode.cw; - return info; + + auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine); + + return { + .tfb_layouts = graphics.regs.tfb_layouts, + .tfb_varying_locs = graphics.regs.tfb_varying_locs, + .primitive_topology = graphics.regs.draw.topology, + .tessellation_primitive = graphics.regs.tess_mode.prim, + .tessellation_spacing = graphics.regs.tess_mode.spacing, + .tfb_enabled = graphics.regs.tfb_enabled != 0, + .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0, + }; } ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { if (shader_stage != ShaderType::Compute) { return {}; } - auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); + + auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine); const auto& launch = compute.launch_description; - ComputeInfo info; - info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; - info.local_memory_size_in_words = launch.local_pos_alloc; - info.shared_memory_size_in_words = launch.shared_alloc; - return info; + return { + .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}, + .shared_memory_size_in_words = launch.shared_alloc, + .local_memory_size_in_words = launch.local_pos_alloc, + }; } } // Anonymous namespace -Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) +Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info) : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} -Registry::Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine) - : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, - graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( - shader_stage, engine)} {} +Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_) + : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()}, + graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo( + shader_stage, engine_)} {} Registry::~Registry() = default; @@ -113,8 +114,7 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler return value; } -std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, - u32 offset) { +std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) { const std::pair key = {buffer, offset}; const auto iter = bindless_samplers.find(key); if (iter != bindless_samplers.end()) { diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h index 231206765..4bebefdde 100644 --- a/src/video_core/shader/registry.h +++ b/src/video_core/shader/registry.h @@ -94,7 +94,7 @@ public: explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); explicit Registry(Tegra::Engines::ShaderType shader_stage, - Tegra::Engines::ConstBufferEngineInterface& engine); + Tegra::Engines::ConstBufferEngineInterface& engine_); ~Registry(); diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index d5ed81442..6be3ea92b 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -205,12 +205,12 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); const auto& found = result.first; if (!found) { - return {}; + return std::nullopt; } if (const auto immediate = std::get_if<ImmediateNode>(&*found)) { return immediate->GetValue(); } - return {}; + return std::nullopt; } std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code, diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index dfcf36e0b..b44c09d71 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -115,20 +115,24 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( if (gpu_addr == candidate_gpu_addr) { return {{0, 0}}; } + if (candidate_gpu_addr < gpu_addr) { - return {}; + return std::nullopt; } + const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; const auto layer{static_cast<u32>(relative_address / layer_size)}; if (layer >= params.depth) { - return {}; + return std::nullopt; } + const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); if (mipmap_it == mipmap_offsets.end()) { - return {}; + return std::nullopt; } + const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; return std::make_pair(layer, level); } diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index e614a92df..e8515321b 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -163,13 +163,11 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl return params; } -SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { - const auto& regs = system.GPU().Maxwell3D().regs; - +SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) { + const auto& regs = maxwell3d.regs; const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U); const bool is_layered = regs.zeta_layers > 1 && block_depth == 0; const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); - return { .is_tiled = regs.zeta.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear, @@ -191,8 +189,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { }; } -SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { - const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; +SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, + std::size_t index) { + const auto& config{maxwell3d.regs.rt[index]}; SurfaceParams params; params.is_tiled = config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h index 118aa689e..4466c3c34 100644 --- a/src/video_core/texture_cache/surface_params.h +++ b/src/video_core/texture_cache/surface_params.h @@ -33,10 +33,11 @@ public: const VideoCommon::Shader::Image& entry); /// Creates SurfaceCachedParams for a depth buffer configuration. - static SurfaceParams CreateForDepthBuffer(Core::System& system); + static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d); /// Creates SurfaceCachedParams from a framebuffer configuration. - static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); + static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d, + std::size_t index); /// Creates SurfaceCachedParams from a Fermi2D surface configuration. static SurfaceParams CreateForFermiCopySurface( diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 96c4e4cc2..ea835c59f 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -135,8 +135,7 @@ public: return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } @@ -160,8 +159,7 @@ public: if (!gpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { return GetNullSurface(SurfaceParams::ExpectedTarget(entry)); } @@ -183,11 +181,11 @@ public: TView GetDepthBufferSurface(bool preserve_contents) { std::lock_guard lock{mutex}; - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { + auto& dirty = maxwell3d.dirty; + if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) { return depth_buffer.view; } - maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; + dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false; const auto& regs{maxwell3d.regs}; const auto gpu_addr{regs.zeta.Address()}; @@ -195,13 +193,12 @@ public: SetEmptyDepthBuffer(); return {}; } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { SetEmptyDepthBuffer(); return {}; } - const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)}; + const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)}; auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true); if (depth_buffer.target) depth_buffer.target->MarkAsRenderTarget(false, NO_RT); @@ -215,7 +212,6 @@ public: TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { std::lock_guard lock{mutex}; ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - auto& maxwell3d = system.GPU().Maxwell3D(); if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) { return render_targets[index].view; } @@ -235,15 +231,14 @@ public: return {}; } - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { SetEmptyColorBuffer(index); return {}; } auto surface_view = - GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), + GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index), preserve_contents, true); if (render_targets[index].target) { auto& surface = render_targets[index].target; @@ -300,9 +295,8 @@ public: const GPUVAddr dst_gpu_addr = dst_config.Address(); DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr); - const auto& memory_manager = system.GPU().MemoryManager(); - const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr); - const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr); + const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr); + const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr); std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false); TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second; ImageBlit(src_surface, dst_surface.second, copy_config); @@ -358,9 +352,11 @@ public: } protected: - explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - bool is_astc_supported) - : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} { + explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_, + bool is_astc_supported_) + : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + gpu_memory{gpu_memory_} { for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { SetEmptyColorBuffer(i); } @@ -395,7 +391,7 @@ protected: virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; void ManageRenderTargetUnregister(TSurface& surface) { - auto& dirty = system.GPU().Maxwell3D().dirty; + auto& dirty = maxwell3d.dirty; const u32 index = surface->GetRenderTarget(); if (index == DEPTH_RT) { dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true; @@ -408,8 +404,7 @@ protected: void Register(TSurface surface) { const GPUVAddr gpu_addr = surface->GetGpuAddr(); const std::size_t size = surface->GetSizeInBytes(); - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", gpu_addr); @@ -459,7 +454,6 @@ protected: return new_surface; } - Core::System& system; const bool is_astc_supported; private: @@ -954,8 +948,7 @@ private: * @param params The parameters on the candidate surface. **/ Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { - const std::optional<VAddr> cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { Deduction result{}; @@ -1112,7 +1105,7 @@ private: void LoadSurface(const TSurface& surface) { staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); - surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); + surface->LoadBuffer(gpu_memory, staging_cache); surface->UploadTexture(staging_cache.GetBuffer(0)); surface->MarkAsModified(false, Tick()); } @@ -1123,7 +1116,7 @@ private: } staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); surface->DownloadTexture(staging_cache.GetBuffer(0)); - surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); + surface->FlushBuffer(gpu_memory, staging_cache); surface->MarkAsModified(false, Tick()); } @@ -1253,6 +1246,8 @@ private: } VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; FormatLookupTable format_lookup_table; FormatCompatibility format_compatibility; diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 45f360bdd..a14df06a3 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <memory> + #include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" @@ -16,37 +17,49 @@ #include "video_core/video_core.h" namespace { -std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system, - Core::Frontend::GraphicsContext& context) { + +std::unique_ptr<VideoCore::RendererBase> CreateRenderer( + Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context) { + auto& telemetry_session = system.TelemetrySession(); + auto& cpu_memory = system.Memory(); + switch (Settings::values.renderer_backend.GetValue()) { case Settings::RendererBackend::OpenGL: - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context); + return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory, + gpu, std::move(context)); #ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: - return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); + return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory, + gpu, std::move(context)); #endif default: return nullptr; } } + } // Anonymous namespace namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + std::unique_ptr<Tegra::GPU> gpu; + if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { + gpu = std::make_unique<VideoCommon::GPUAsynch>(system); + } else { + gpu = std::make_unique<VideoCommon::GPUSynch>(system); + } + auto context = emu_window.CreateSharedContext(); const auto scope = context->Acquire(); - auto renderer = CreateRenderer(emu_window, system, *context); + + auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); if (!renderer->Init()) { return nullptr; } - if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer), - std::move(context)); - } - return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context)); + gpu->BindRenderer(std::move(renderer)); + return gpu; } u16 GetResolutionScaleFactor(const RendererBase& renderer) { |