diff options
Diffstat (limited to 'src/video_core')
61 files changed, 632 insertions, 384 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3cd896a0f..d85f1e9d1 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(host_shaders) + add_library(video_core STATIC buffer_cache/buffer_block.h buffer_cache/buffer_cache.h @@ -244,6 +246,9 @@ create_target_directory_groups(video_core) target_link_libraries(video_core PUBLIC common core) target_link_libraries(video_core PRIVATE glad xbyak) +add_dependencies(video_core host_shaders) +target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) + if (ENABLE_VULKAN) target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include) target_compile_definitions(video_core PRIVATE HAS_VULKAN) diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index ff10ff40d..6e50661a3 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -10,7 +10,13 @@ namespace Tegra::Engines { -Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} +Fermi2D::Fermi2D() = default; + +Fermi2D::~Fermi2D() = default; + +void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, @@ -87,7 +93,7 @@ void Fermi2D::HandleSurfaceCopy() { copy_config.src_rect = src_rect; copy_config.dst_rect = dst_rect; - if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { + if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h index 8f37d053f..213abfaae 100644 --- a/src/video_core/engines/fermi_2d.h +++ b/src/video_core/engines/fermi_2d.h @@ -34,8 +34,11 @@ namespace Tegra::Engines { class Fermi2D final : public EngineInterface { public: - explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer); - ~Fermi2D() = default; + explicit Fermi2D(); + ~Fermi2D(); + + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); /// Write the value to the register identified by method. void CallMethod(u32 method, u32 method_argument, bool is_last_call) override; @@ -149,7 +152,7 @@ public: }; private: - VideoCore::RasterizerInterface& rasterizer; + VideoCore::RasterizerInterface* rasterizer; /// Performs the copy from the source surface to the destination surface as configured in the /// registers. diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index a82b06a38..898370739 100644 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -16,14 +16,15 @@ namespace Tegra::Engines { -KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{ - memory_manager, - regs.upload} {} +KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {} KeplerCompute::~KeplerCompute() = default; +void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid KeplerCompute register, increase the size of the Regs structure"); @@ -104,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const { } VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } void KeplerCompute::ProcessLaunch() { @@ -119,7 +120,7 @@ void KeplerCompute::ProcessLaunch() { const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); - rasterizer.DispatchCompute(code_addr); + rasterizer->DispatchCompute(code_addr); } Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index b7f668d88..7f2500aab 100644 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -42,10 +42,12 @@ namespace Tegra::Engines { class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface { public: - explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); + explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); ~KeplerCompute(); + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + static constexpr std::size_t NumConstBuffers = 8; struct Regs { @@ -230,11 +232,6 @@ public: const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override; private: - Core::System& system; - VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; - Upload::State upload_state; - void ProcessLaunch(); /// Retrieves information about a specific TIC entry from the TIC buffer. @@ -242,6 +239,11 @@ private: /// Retrieves information about a specific TSC entry from the TSC buffer. Texture::TSCEntry GetTSCEntry(u32 tsc_index) const; + + Core::System& system; + MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; + Upload::State upload_state; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index c01436295..33854445f 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -22,14 +22,19 @@ using VideoCore::QueryType; /// First register id that is actually a Macro call. constexpr u32 MacroRegistersStart = 0xE00; -Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager) - : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, - macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} { +Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_) + : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)}, + upload_state{memory_manager, regs.upload} { dirty.flags.flip(); InitializeRegisterDefaults(); } +Maxwell3D::~Maxwell3D() = default; + +void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + void Maxwell3D::InitializeRegisterDefaults() { // Initializes registers to their default values - what games expect them to be at boot. This is // for certain registers that may not be explicitly set by games. @@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) { switch (method) { case MAXWELL3D_REG_INDEX(wait_for_idle): { - rasterizer.WaitForIdle(); + rasterizer->WaitForIdle(); break; } case MAXWELL3D_REG_INDEX(shadow_ram_control): { @@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() { const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed; if (ShouldExecute()) { - rasterizer.Draw(is_indexed, true); + rasterizer->Draw(is_indexed, true); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() { switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: if (regs.query.query_get.fence == 1) { - rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); + rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); } else { StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); } @@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() { void Maxwell3D::ProcessCounterReset() { switch (regs.counter_reset) { case Regs::CounterReset::SampleCnt: - rasterizer.ResetCounter(QueryType::SamplesPassed); + rasterizer->ResetCounter(QueryType::SamplesPassed); break; default: LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}", @@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() { const u32 increment = regs.sync_info.increment.Value(); [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value(); if (increment) { - rasterizer.SignalSyncPoint(sync_point); + rasterizer->SignalSyncPoint(sync_point); } } @@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() { const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count}; if (ShouldExecute()) { - rasterizer.Draw(is_indexed, false); + rasterizer->Draw(is_indexed, false); } // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if @@ -590,8 +595,8 @@ std::optional<u64> Maxwell3D::GetQueryResult() { return 0; case Regs::QuerySelect::SamplesPassed: // Deferred. - rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, - system.GPU().GetTicks()); + rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed, + system.GPU().GetTicks()); return {}; default: LOG_DEBUG(HW_GPU, "Unimplemented query select type {}", @@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() { regs.clear_buffers.R == regs.clear_buffers.B && regs.clear_buffers.R == regs.clear_buffers.A); - rasterizer.Clear(); + rasterizer->Clear(); } u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const { @@ -752,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const { } VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const { - return rasterizer.AccessGuestDriverProfile(); + return rasterizer->AccessGuestDriverProfile(); } } // namespace Tegra::Engines diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index ef1618990..bc289c55d 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -51,9 +51,11 @@ namespace Tegra::Engines { class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface { public: - explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - MemoryManager& memory_manager); - ~Maxwell3D() = default; + explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager); + ~Maxwell3D(); + + /// Binds a rasterizer to this engine. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); /// Register structure of the Maxwell3D engine. /// TODO(Subv): This structure will need to be made bigger as more registers are discovered. @@ -647,7 +649,7 @@ public: GetX() + GetWidth(), // right GetY() // bottom }; - }; + } f32 GetX() const { return std::max(0.0f, translate_x - std::fabs(scale_x)); @@ -1418,12 +1420,12 @@ public: return execute_on; } - VideoCore::RasterizerInterface& GetRasterizer() { - return rasterizer; + VideoCore::RasterizerInterface& Rasterizer() { + return *rasterizer; } - const VideoCore::RasterizerInterface& GetRasterizer() const { - return rasterizer; + const VideoCore::RasterizerInterface& Rasterizer() const { + return *rasterizer; } /// Notify a memory write has happened. @@ -1460,11 +1462,10 @@ private: void InitializeRegisterDefaults(); Core::System& system; - - VideoCore::RasterizerInterface& rasterizer; - MemoryManager& memory_manager; + VideoCore::RasterizerInterface* rasterizer = nullptr; + /// Start offsets of each macro in macro_memory std::array<u32, 0x80> macro_positions = {}; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index a2d3d7823..e88290754 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -94,7 +94,8 @@ void MaxwellDMA::CopyPitchToPitch() { } void MaxwellDMA::CopyBlockLinearToPitch() { - ASSERT(regs.src_params.block_size.depth == 0); + UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); + UNIMPLEMENTED_IF(regs.src_params.layer != 0); // Optimized path for micro copies. const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; @@ -123,17 +124,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() { write_buffer.resize(dst_size); } - if (Settings::IsGPULevelExtreme()) { - memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); - memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); - } else { - memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); - memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); - } + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, width, bytes_per_pixel, - read_buffer.data() + src_layer_size * src_params.layer, write_buffer.data(), - block_height, src_params.origin.x, src_params.origin.y); + block_height, src_params.origin.x, src_params.origin.y, write_buffer.data(), + read_buffer.data()); memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } @@ -198,7 +194,6 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { if (read_buffer.size() < src_size) { read_buffer.resize(src_size); } - if (write_buffer.size() < dst_size) { write_buffer.resize(dst_size); } @@ -212,8 +207,8 @@ void MaxwellDMA::FastCopyBlockLinearToPitch() { } UnswizzleSubrect(regs.line_length_in, regs.line_count, regs.pitch_out, regs.src_params.width, - bytes_per_pixel, read_buffer.data(), write_buffer.data(), - regs.src_params.block_size.height, pos_x, pos_y); + bytes_per_pixel, regs.src_params.block_size.height, pos_x, pos_y, + write_buffer.data(), read_buffer.data()); memory_manager.WriteBlock(regs.offset_out, write_buffer.data(), dst_size); } diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index 8b2a6a42c..06cc12d5a 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -5,15 +5,10 @@ #pragma once #include <algorithm> -#include <array> -#include <memory> #include <queue> -#include "common/assert.h" #include "common/common_types.h" #include "core/core.h" -#include "core/memory.h" -#include "core/settings.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 512578c8b..acb6e6d46 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -27,21 +27,28 @@ namespace Tegra { MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async) - : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { - auto& rasterizer{renderer->Rasterizer()}; - memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); - dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this); - maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); - fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); - kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); - maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager); - kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); - shader_notify = std::make_unique<VideoCore::ShaderNotify>(); -} +GPU::GPU(Core::System& system_, bool is_async_) + : system{system_}, dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)}, + memory_manager{std::make_unique<Tegra::MemoryManager>(system)}, + maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)}, + fermi_2d{std::make_unique<Engines::Fermi2D>()}, + kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)}, + maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)}, + kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)}, + shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {} GPU::~GPU() = default; +void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { + renderer = std::move(renderer_); + + VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer(); + memory_manager->BindRasterizer(rasterizer); + maxwell_3d->BindRasterizer(rasterizer); + fermi_2d->BindRasterizer(rasterizer); + kepler_compute->BindRasterizer(rasterizer); +} + Engines::Maxwell3D& GPU::Maxwell3D() { return *maxwell_3d; } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ebfc7b0c7..c7d11deb2 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -142,11 +142,6 @@ class MemoryManager; class GPU { public: - explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - bool is_async); - - virtual ~GPU(); - struct MethodCall { u32 method{}; u32 argument{}; @@ -162,6 +157,12 @@ public: method_count(method_count) {} }; + explicit GPU(Core::System& system, bool is_async); + virtual ~GPU(); + + /// Binds a renderer to the GPU. + void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer); + /// Calls a GPU method. void CallMethod(const MethodCall& method_call); @@ -345,8 +346,8 @@ private: bool ExecuteMethodOnEngine(u32 method); protected: - std::unique_ptr<Tegra::DmaPusher> dma_pusher; Core::System& system; + std::unique_ptr<Tegra::DmaPusher> dma_pusher; std::unique_ptr<VideoCore::RendererBase> renderer; private: diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 7b855f63e..70a3d5738 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -10,16 +10,14 @@ namespace VideoCommon { -GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer_), true), gpu_thread{system}, - cpu_context(renderer->GetRenderWindow().CreateSharedContext()), - gpu_context(std::move(context)) {} +GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {} GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); + gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher); + cpu_context = renderer->GetRenderWindow().CreateSharedContext(); + cpu_context->MakeCurrent(); } void GPUAsynch::ObtainContext() { diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 15e9f1d38..f89c855a5 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -20,8 +20,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU asynchronously class GPUAsynch final : public Tegra::GPU { public: - explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context); + explicit GPUAsynch(Core::System& system); ~GPUAsynch() override; void Start() override; @@ -42,7 +41,6 @@ protected: private: GPUThread::ThreadManager gpu_thread; std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context; - std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context; }; } // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index aaeb9811d..1ca47ddef 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -7,20 +7,18 @@ namespace VideoCommon { -GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context) - : GPU(system, std::move(renderer), false), context{std::move(context)} {} +GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {} GPUSynch::~GPUSynch() = default; void GPUSynch::Start() {} void GPUSynch::ObtainContext() { - context->MakeCurrent(); + renderer->Context().MakeCurrent(); } void GPUSynch::ReleaseContext() { - context->DoneCurrent(); + renderer->Context().DoneCurrent(); } void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 762c20aa5..297258cb1 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -19,8 +19,7 @@ namespace VideoCommon { /// Implementation of GPU interface that runs the GPU synchronously class GPUSynch final : public Tegra::GPU { public: - explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer, - std::unique_ptr<Core::Frontend::GraphicsContext>&& context); + explicit GPUSynch(Core::System& system); ~GPUSynch() override; void Start() override; @@ -36,9 +35,6 @@ public: protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, [[maybe_unused]] u32 value) const override {} - -private: - std::unique_ptr<Core::Frontend::GraphicsContext> context; }; } // namespace VideoCommon diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt new file mode 100644 index 000000000..aa62363a7 --- /dev/null +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -0,0 +1,43 @@ +set(SHADER_FILES + opengl_present.frag + opengl_present.vert +) + +set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include) +set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE) + +set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders) +add_custom_command( + OUTPUT + ${SHADER_DIR} + COMMAND + ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR} +) + +set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in) +set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake) + +foreach(FILENAME IN ITEMS ${SHADER_FILES}) + string(REPLACE "." "_" SHADER_NAME ${FILENAME}) + set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME}) + set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h) + add_custom_command( + OUTPUT + ${HEADER_FILE} + COMMAND + ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE} + MAIN_DEPENDENCY + ${SOURCE_FILE} + DEPENDS + ${HEADER_GENERATOR} + ${INPUT_FILE} + ) + set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE}) +endforeach() + +add_custom_target(host_shaders + DEPENDS + ${SHADER_HEADERS} + SOURCES + ${SHADER_FILES} +) diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake new file mode 100644 index 000000000..368bce0ed --- /dev/null +++ b/src/video_core/host_shaders/StringShaderHeader.cmake @@ -0,0 +1,11 @@ +set(SOURCE_FILE ${CMAKE_ARGV3}) +set(HEADER_FILE ${CMAKE_ARGV4}) +set(INPUT_FILE ${CMAKE_ARGV5}) + +get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME) +string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME}) +string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME) + +file(READ ${SOURCE_FILE} CONTENTS) + +configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY) diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag new file mode 100644 index 000000000..8a4cb024b --- /dev/null +++ b/src/video_core/host_shaders/opengl_present.frag @@ -0,0 +1,10 @@ +#version 430 core + +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; + +layout (binding = 0) uniform sampler2D color_texture; + +void main() { + color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); +} diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert new file mode 100644 index 000000000..2235d31a4 --- /dev/null +++ b/src/video_core/host_shaders/opengl_present.vert @@ -0,0 +1,24 @@ +#version 430 core + +out gl_PerVertex { + vec4 gl_Position; +}; + +layout (location = 0) in vec2 vert_position; +layout (location = 1) in vec2 vert_tex_coord; +layout (location = 0) out vec2 frag_tex_coord; + +// This is a truncated 3x3 matrix for 2D transformations: +// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. +// The third column performs translation. +// The third row could be used for projection, which we don't need in 2D. It hence is assumed to +// implicitly be [0, 0, 1] +layout (location = 0) uniform mat3x2 modelview_matrix; + +void main() { + // Multiply input position by the rotscale part of the matrix and then manually translate by + // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector + // to `vec3(vert_position.xy, 1.0)` + gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); + frag_tex_coord = vert_tex_coord; +} diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in new file mode 100644 index 000000000..ccdb0d2a9 --- /dev/null +++ b/src/video_core/host_shaders/source_shader.h.in @@ -0,0 +1,9 @@ +#pragma once + +#include <string_view> + +namespace HostShaders { + +constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)"; + +} // namespace HostShaders diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp index 0c9ff59a4..df00b57df 100644 --- a/src/video_core/macro/macro_hle.cpp +++ b/src/video_core/macro/macro_hle.cpp @@ -24,7 +24,7 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.index_array.first = parameters[4]; if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, true); } maxwell3d.regs.index_array.count = 0; maxwell3d.mme_draw.instance_count = 0; @@ -42,7 +42,7 @@ void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.mme_draw.instance_count = count; if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(false, true); + maxwell3d.Rasterizer().Draw(false, true); } maxwell3d.regs.vertex_buffer.count = 0; maxwell3d.mme_draw.instance_count = 0; @@ -65,7 +65,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>& maxwell3d.regs.draw.topology.Assign( static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0])); if (maxwell3d.ShouldExecute()) { - maxwell3d.GetRasterizer().Draw(true, true); + maxwell3d.Rasterizer().Draw(true, true); } maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base? maxwell3d.regs.index_array.count = 0; diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp index aa5256419..bd01fd1f2 100644 --- a/src/video_core/macro/macro_interpreter.cpp +++ b/src/video_core/macro/macro_interpreter.cpp @@ -34,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho this->parameters = std::make_unique<u32[]>(num_parameters); } std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32)); - this->num_parameters = num_parameters; // Execute the code until we hit an exit condition. bool keep_executing = true; diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 844164645..16b2aaa27 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -14,11 +14,15 @@ namespace Tegra { -MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) - : system{system}, rasterizer{rasterizer}, page_table(page_table_size) {} +MemoryManager::MemoryManager(Core::System& system_) + : system{system_}, page_table(page_table_size) {} MemoryManager::~MemoryManager() = default; +void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) { + rasterizer = &rasterizer_; +} + GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) { u64 remaining_size{size}; for (u64 offset{}; offset < size; offset += page_size) { @@ -217,7 +221,7 @@ void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::siz // Flush must happen on the rasterizer interface, such that memory is always synchronous // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(src_addr, copy_amount); + rasterizer->FlushRegion(src_addr, copy_amount); system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); } @@ -266,7 +270,7 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, s // Invalidate must happen on the rasterizer interface, such that memory is always // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(dest_addr, copy_amount); + rasterizer->InvalidateRegion(dest_addr, copy_amount); system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); } @@ -312,10 +316,10 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_add WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size); } -bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { +bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { const auto cpu_addr{GpuToCpuAddress(gpu_addr)}; if (!cpu_addr) { - return {}; + return false; } const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size}; return page <= Core::Memory::PAGE_SIZE; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 681bd9588..53c8d122a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -31,19 +31,19 @@ public: constexpr PageEntry(State state) : state{state} {} constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {} - constexpr bool IsUnmapped() const { + [[nodiscard]] constexpr bool IsUnmapped() const { return state == State::Unmapped; } - constexpr bool IsAllocated() const { + [[nodiscard]] constexpr bool IsAllocated() const { return state == State::Allocated; } - constexpr bool IsValid() const { + [[nodiscard]] constexpr bool IsValid() const { return !IsUnmapped() && !IsAllocated(); } - constexpr VAddr ToAddress() const { + [[nodiscard]] constexpr VAddr ToAddress() const { if (!IsValid()) { return {}; } @@ -51,7 +51,7 @@ public: return static_cast<VAddr>(state) << ShiftBits; } - constexpr PageEntry operator+(u64 offset) { + [[nodiscard]] constexpr PageEntry operator+(u64 offset) const { // If this is a reserved value, offsets do not apply if (!IsValid()) { return *this; @@ -68,19 +68,22 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large"); class MemoryManager final { public: - explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit MemoryManager(Core::System& system); ~MemoryManager(); - std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; + /// Binds a renderer to the memory manager. + void BindRasterizer(VideoCore::RasterizerInterface& rasterizer); + + [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const; template <typename T> - T Read(GPUVAddr addr) const; + [[nodiscard]] T Read(GPUVAddr addr) const; template <typename T> void Write(GPUVAddr addr, T data); - u8* GetPointer(GPUVAddr addr); - const u8* GetPointer(GPUVAddr addr) const; + [[nodiscard]] u8* GetPointer(GPUVAddr addr); + [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; /** * ReadBlock and WriteBlock are full read and write operations over virtual @@ -109,24 +112,24 @@ public: /** * IsGranularRange checks if a gpu region can be simply read with a pointer. */ - bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; - GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); - GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); - std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); - GPUVAddr Allocate(std::size_t size, std::size_t align); + [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); + [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size); + [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align); void Unmap(GPUVAddr gpu_addr, std::size_t size); private: - PageEntry GetPageEntry(GPUVAddr gpu_addr) const; + [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const; void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size); GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size); - std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; + [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const; void TryLockPage(PageEntry page_entry, std::size_t size); void TryUnlockPage(PageEntry page_entry, std::size_t size); - static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) { + [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) { return (gpu_addr >> page_bits) & page_table_mask; } @@ -141,7 +144,7 @@ private: Core::System& system; - VideoCore::RasterizerInterface& rasterizer; + VideoCore::RasterizerInterface* rasterizer = nullptr; std::vector<PageEntry> page_table; }; diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index dfb06e87e..a93a1732c 100644 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -9,7 +9,9 @@ namespace VideoCore { -RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} { +RendererBase::RendererBase(Core::Frontend::EmuWindow& window_, + std::unique_ptr<Core::Frontend::GraphicsContext> context_) + : render_window{window_}, context{std::move(context_)} { RefreshBaseSettings(); } diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 1d85219b6..649074acd 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -15,7 +15,8 @@ namespace Core::Frontend { class EmuWindow; -} +class GraphicsContext; +} // namespace Core::Frontend namespace VideoCore { @@ -25,14 +26,15 @@ struct RendererSettings { // Screenshot std::atomic<bool> screenshot_requested{false}; - void* screenshot_bits; + void* screenshot_bits{}; std::function<void()> screenshot_complete_callback; Layout::FramebufferLayout screenshot_framebuffer_layout; }; class RendererBase : NonCopyable { public: - explicit RendererBase(Core::Frontend::EmuWindow& window); + explicit RendererBase(Core::Frontend::EmuWindow& window, + std::unique_ptr<Core::Frontend::GraphicsContext> context); virtual ~RendererBase(); /// Initialize the renderer @@ -68,6 +70,14 @@ public: return *rasterizer; } + Core::Frontend::GraphicsContext& Context() { + return *context; + } + + const Core::Frontend::GraphicsContext& Context() const { + return *context; + } + Core::Frontend::EmuWindow& GetRenderWindow() { return render_window; } @@ -94,6 +104,7 @@ public: protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr<RasterizerInterface> rasterizer; + std::unique_ptr<Core::Frontend::GraphicsContext> context; f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer int m_current_frame = 0; ///< Current frame, should be set by the renderer diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index ec5421afa..3d2588dd2 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -4,16 +4,17 @@ #include "common/assert.h" +#include <glad/glad.h> + #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_fence_manager.h" namespace OpenGL { -GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {} +GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {} GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed) - : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {} + : FenceBase(address, payload, is_stubbed) {} GLInnerFence::~GLInnerFence() = default; diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h index c917b3343..1686cf5c8 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.h +++ b/src/video_core/renderer_opengl/gl_fence_manager.h @@ -5,7 +5,6 @@ #pragma once #include <memory> -#include <glad/glad.h> #include "common/common_types.h" #include "video_core/fence_manager.h" diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cb284db77..4af5824cd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind } if (device.UseAsynchronousShaders()) { - // Max worker threads we should allow - constexpr u32 MAX_THREADS = 4; - // Deduce how many threads we can use - const u32 threads_used = std::thread::hardware_concurrency() / 4; - // Always allow at least 1 thread regardless of our settings - const auto max_worker_count = std::max(1U, threads_used); - // Don't use more than MAX_THREADS - const auto worker_count = std::min(max_worker_count, MAX_THREADS); - async_shaders.AllocateWorkers(worker_count); + async_shaders.AllocateWorkers(); } } diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index a787e27d2..0ebcec427 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <string_view> #include <utility> #include <glad/glad.h> #include "common/common_types.h" @@ -82,11 +83,13 @@ void OGLSampler::Release() { handle = 0; } -void OGLShader::Create(const char* source, GLenum type) { - if (handle != 0) +void OGLShader::Create(std::string_view source, GLenum type) { + if (handle != 0) { return; - if (source == nullptr) + } + if (source.empty()) { return; + } MICROPROFILE_SCOPE(OpenGL_ResourceCreation); handle = GLShader::LoadShader(source, type); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index b05cb641c..f48398669 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -4,6 +4,7 @@ #pragma once +#include <string_view> #include <utility> #include <glad/glad.h> #include "common/common_types.h" @@ -127,7 +128,7 @@ public: return *this; } - void Create(const char* source, GLenum type); + void Create(std::string_view source, GLenum type); void Release(); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index eb49a36bf..a07d56ef0 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -22,6 +22,7 @@ #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_arb_decompiler.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 2dcc2b0eb..40c0877c1 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -73,7 +73,7 @@ ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; -bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { +bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) { if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { return false; } @@ -144,7 +144,7 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { return true; } -bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { +bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const { if (file.WriteObject(static_cast<u32>(type)) != 1 || file.WriteObject(static_cast<u32>(code.size())) != 1 || file.WriteObject(static_cast<u32>(code_b.size())) != 1) { @@ -214,20 +214,20 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran // Skip games without title id const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) { - return {}; + return std::nullopt; } - FileUtil::IOFile file(GetTransferablePath(), "rb"); + Common::FS::IOFile file(GetTransferablePath(), "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No transferable shader cache found"); is_usable = true; - return {}; + return std::nullopt; } u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); - return {}; + return std::nullopt; } if (version < NativeVersion) { @@ -235,12 +235,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran file.Close(); InvalidateTransferable(); is_usable = true; - return {}; + return std::nullopt; } if (version > NativeVersion) { LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version " "of the emulator, skipping"); - return {}; + return std::nullopt; } // Version is valid, load the shaders @@ -249,7 +249,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran ShaderDiskCacheEntry& entry = entries.emplace_back(); if (!entry.Load(file)) { LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); - return {}; + return std::nullopt; } } @@ -262,7 +262,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() return {}; } - FileUtil::IOFile file(GetPrecompiledPath(), "rb"); + Common::FS::IOFile file(GetPrecompiledPath(), "rb"); if (!file.IsOpen()) { LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); return {}; @@ -279,7 +279,7 @@ std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() } std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( - FileUtil::IOFile& file) { + Common::FS::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); file.ReadBytes(compressed.data(), compressed.size()); @@ -290,12 +290,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo ShaderCacheVersionHash file_hash{}; if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) { precompiled_cache_virtual_file_offset = 0; - return {}; + return std::nullopt; } if (GetShaderCacheVersionHash() != file_hash) { LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator"); precompiled_cache_virtual_file_offset = 0; - return {}; + return std::nullopt; } std::vector<ShaderDiskCachePrecompiled> entries; @@ -305,19 +305,20 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo if (!LoadObjectFromPrecompiled(entry.unique_identifier) || !LoadObjectFromPrecompiled(entry.binary_format) || !LoadObjectFromPrecompiled(binary_size)) { - return {}; + return std::nullopt; } entry.binary.resize(binary_size); if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { - return {}; + return std::nullopt; } } - return entries; + + return std::move(entries); } void ShaderDiskCacheOpenGL::InvalidateTransferable() { - if (!FileUtil::Delete(GetTransferablePath())) { + if (!Common::FS::Delete(GetTransferablePath())) { LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}", GetTransferablePath()); } @@ -328,7 +329,7 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { // Clear virtaul precompiled cache file precompiled_cache_virtual_file.Resize(0); - if (!FileUtil::Delete(GetPrecompiledPath())) { + if (!Common::FS::Delete(GetPrecompiledPath())) { LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}", GetPrecompiledPath()); } } @@ -344,7 +345,7 @@ void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { return; } - FileUtil::IOFile file = AppendTransferableFile(); + Common::FS::IOFile file = AppendTransferableFile(); if (!file.IsOpen()) { return; } @@ -386,15 +387,15 @@ void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint progra } } -FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { +Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const { if (!EnsureDirectories()) { return {}; } const auto transferable_path{GetTransferablePath()}; - const bool existed = FileUtil::Exists(transferable_path); + const bool existed = Common::FS::Exists(transferable_path); - FileUtil::IOFile file(transferable_path, "ab"); + Common::FS::IOFile file(transferable_path, "ab"); if (!file.IsOpen()) { LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}", transferable_path); return {}; @@ -426,7 +427,7 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size()); const auto precompiled_path{GetPrecompiledPath()}; - FileUtil::IOFile file(precompiled_path, "wb"); + Common::FS::IOFile file(precompiled_path, "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}", precompiled_path); @@ -440,24 +441,24 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { bool ShaderDiskCacheOpenGL::EnsureDirectories() const { const auto CreateDir = [](const std::string& dir) { - if (!FileUtil::CreateDir(dir)) { + if (!Common::FS::CreateDir(dir)) { LOG_ERROR(Render_OpenGL, "Failed to create directory={}", dir); return false; } return true; }; - return CreateDir(FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir)) && + return CreateDir(Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)) && CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) && CreateDir(GetPrecompiledDir()); } std::string ShaderDiskCacheOpenGL::GetTransferablePath() const { - return FileUtil::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); + return Common::FS::SanitizePath(GetTransferableDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); } std::string ShaderDiskCacheOpenGL::GetPrecompiledPath() const { - return FileUtil::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); + return Common::FS::SanitizePath(GetPrecompiledDir() + DIR_SEP_CHR + GetTitleID() + ".bin"); } std::string ShaderDiskCacheOpenGL::GetTransferableDir() const { @@ -469,7 +470,7 @@ std::string ShaderDiskCacheOpenGL::GetPrecompiledDir() const { } std::string ShaderDiskCacheOpenGL::GetBaseDir() const { - return FileUtil::GetUserPath(FileUtil::UserPath::ShaderDir) + DIR_SEP "opengl"; + return Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir) + DIR_SEP "opengl"; } std::string ShaderDiskCacheOpenGL::GetTitleID() const { diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index a79cef0e9..db2bb73bc 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -25,7 +25,7 @@ namespace Core { class System; } -namespace FileUtil { +namespace Common::FS { class IOFile; } @@ -38,9 +38,9 @@ struct ShaderDiskCacheEntry { ShaderDiskCacheEntry(); ~ShaderDiskCacheEntry(); - bool Load(FileUtil::IOFile& file); + bool Load(Common::FS::IOFile& file); - bool Save(FileUtil::IOFile& file) const; + bool Save(Common::FS::IOFile& file) const; bool HasProgramA() const { return !code.empty() && !code_b.empty(); @@ -97,10 +97,10 @@ public: private: /// Loads the transferable cache. Returns empty on failure. std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( - FileUtil::IOFile& file); + Common::FS::IOFile& file); /// Opens current game's transferable file and write it's header if it doesn't exist - FileUtil::IOFile AppendTransferableFile() const; + Common::FS::IOFile AppendTransferableFile() const; /// Save precompiled header to precompiled_cache_in_memory void SavePrecompiledHeaderToVirtualPrecompiledCache(); diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp index 9e74eda0d..4bf0d6090 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.cpp +++ b/src/video_core/renderer_opengl/gl_shader_util.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <string_view> #include <vector> #include <glad/glad.h> #include "common/assert.h" @@ -11,7 +12,8 @@ namespace OpenGL::GLShader { namespace { -const char* GetStageDebugName(GLenum type) { + +std::string_view StageDebugName(GLenum type) { switch (type) { case GL_VERTEX_SHADER: return "vertex"; @@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) { UNIMPLEMENTED(); return "unknown"; } + } // Anonymous namespace -GLuint LoadShader(const char* source, GLenum type) { - const char* debug_type = GetStageDebugName(type); +GLuint LoadShader(std::string_view source, GLenum type) { + const std::string_view debug_type = StageDebugName(type); const GLuint shader_id = glCreateShader(type); - glShaderSource(shader_id, 1, &source, nullptr); + + const GLchar* source_string = source.data(); + const GLint source_length = static_cast<GLint>(source.size()); + + glShaderSource(shader_id, 1, &source_string, &source_length); LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); glCompileShader(shader_id); diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 03b7548c2..1b770532e 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) { * @param source String of the GLSL shader program * @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER) */ -GLuint LoadShader(const char* source, GLenum type); +GLuint LoadShader(std::string_view source, GLenum type); /** * Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 0a7bc9e2b..f403f388a 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -403,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() { LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); } -void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { +void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) { LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index bfc4ddf5d..de8f18489 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -90,7 +90,7 @@ public: Tegra::Texture::SwizzleSource z_source, Tegra::Texture::SwizzleSource w_source); - void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); + void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix); void MarkAsModified(u64 tick) { surface.MarkAsModified(true, tick); diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 52e9e8250..b759c2dba 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -21,6 +21,8 @@ #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" +#include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_vert.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -44,46 +46,6 @@ struct Frame { bool is_srgb{}; /// Framebuffer is sRGB or RGB }; -constexpr char VERTEX_SHADER[] = R"( -#version 430 core - -out gl_PerVertex { - vec4 gl_Position; -}; - -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec2 vert_tex_coord; -layout (location = 0) out vec2 frag_tex_coord; - -// This is a truncated 3x3 matrix for 2D transformations: -// The upper-left 2x2 submatrix performs scaling/rotation/mirroring. -// The third column performs translation. -// The third row could be used for projection, which we don't need in 2D. It hence is assumed to -// implicitly be [0, 0, 1] -layout (location = 0) uniform mat3x2 modelview_matrix; - -void main() { - // Multiply input position by the rotscale part of the matrix and then manually translate by - // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector - // to `vec3(vert_position.xy, 1.0)` - gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0); - frag_tex_coord = vert_tex_coord; -} -)"; - -constexpr char FRAGMENT_SHADER[] = R"( -#version 430 core - -layout (location = 0) in vec2 frag_tex_coord; -layout (location = 0) out vec4 color; - -layout (binding = 0) uniform sampler2D color_texture; - -void main() { - color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); -} -)"; - constexpr GLint PositionLocation = 0; constexpr GLint TexCoordLocation = 1; constexpr GLint ModelViewMatrixLocation = 0; @@ -313,10 +275,11 @@ public: } }; -RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, - Core::Frontend::GraphicsContext& context) - : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context}, - program_manager{device}, has_debug_tool{HasDebugTool()} {} +RendererOpenGL::RendererOpenGL(Core::System& system_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context_) + : RendererBase{emu_window_, std::move(context_)}, system{system_}, + emu_window{emu_window_}, gpu{gpu_}, program_manager{device}, has_debug_tool{HasDebugTool()} {} RendererOpenGL::~RendererOpenGL() = default; @@ -384,7 +347,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (has_debug_tool) { glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); Present(0); - context.SwapBuffers(); + context->SwapBuffers(); } } @@ -460,10 +423,10 @@ void RendererOpenGL::InitOpenGLObjects() { // Create shader programs OGLShader vertex_shader; - vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER); + vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); OGLShader fragment_shader; - fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER); + fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); vertex_program.Create(true, false, vertex_shader.handle); fragment_program.Create(true, false, fragment_shader.handle); @@ -509,9 +472,10 @@ void RendererOpenGL::AddTelemetryFields() { LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model); auto& telemetry_session = system.TelemetrySession(); - telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); - telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); - telemetry_session.AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); + constexpr auto user_system = Common::Telemetry::FieldType::UserSystem; + telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor); + telemetry_session.AddField(user_system, "GPU_Model", gpu_model); + telemetry_session.AddField(user_system, "GPU_OpenGL_Version", gl_version); } void RendererOpenGL::CreateRasterizer() { diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 8b18d32e6..52ea76b7d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -56,8 +56,9 @@ class FrameMailbox; class RendererOpenGL final : public VideoCore::RendererBase { public: - explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system, - Core::Frontend::GraphicsContext& context); + explicit RendererOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, + Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererOpenGL() override; bool Init() override; @@ -93,9 +94,9 @@ private: bool Present(int timeout_ms); - Core::Frontend::EmuWindow& emu_window; Core::System& system; - Core::Frontend::GraphicsContext& context; + Core::Frontend::EmuWindow& emu_window; + Tegra::GPU& gpu; const Device device; StateTracker state_tracker{system}; @@ -120,7 +121,7 @@ private: std::vector<u8> gl_framebuffer_data; /// Used for transforming the framebuffer orientation - Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; + Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{}; Common::Rectangle<int> framebuffer_crop_rect; /// Frame presentation mailbox diff --git a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp index 435c8c1b8..5b01020ec 100644 --- a/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp +++ b/src/video_core/renderer_vulkan/nsight_aftermath_tracker.cpp @@ -65,10 +65,10 @@ bool NsightAftermathTracker::Initialize() { return false; } - dump_dir = FileUtil::GetUserPath(FileUtil::UserPath::LogDir) + "gpucrash"; + dump_dir = Common::FS::GetUserPath(Common::FS::UserPath::LogDir) + "gpucrash"; - (void)FileUtil::DeleteDirRecursively(dump_dir); - if (!FileUtil::CreateDir(dump_dir)) { + (void)Common::FS::DeleteDirRecursively(dump_dir); + if (!Common::FS::CreateDir(dump_dir)) { LOG_ERROR(Render_Vulkan, "Failed to create Nsight Aftermath dump directory"); return false; } @@ -106,7 +106,7 @@ void NsightAftermathTracker::SaveShader(const std::vector<u32>& spirv) const { return; } - FileUtil::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); + Common::FS::IOFile file(fmt::format("{}/source_{:016x}.spv", dump_dir, hash.hash), "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_Vulkan, "Failed to dump SPIR-V module with hash={:016x}", hash.hash); return; @@ -156,12 +156,12 @@ void NsightAftermathTracker::OnGpuCrashDumpCallback(const void* gpu_crash_dump, }(); std::string_view dump_view(static_cast<const char*>(gpu_crash_dump), gpu_crash_dump_size); - if (FileUtil::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { + if (Common::FS::WriteStringToFile(false, base_name, dump_view) != gpu_crash_dump_size) { LOG_ERROR(Render_Vulkan, "Failed to write dump file"); return; } const std::string_view json_view(json.data(), json.size()); - if (FileUtil::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { + if (Common::FS::WriteStringToFile(true, base_name + ".json", json_view) != json.size()) { LOG_ERROR(Render_Vulkan, "Failed to write JSON"); return; } @@ -180,7 +180,7 @@ void NsightAftermathTracker::OnShaderDebugInfoCallback(const void* shader_debug_ const std::string path = fmt::format("{}/shader_{:016x}{:016x}.nvdbg", dump_dir, identifier.id[0], identifier.id[1]); - FileUtil::IOFile file(path, "wb"); + Common::FS::IOFile file(path, "wb"); if (!file.IsOpen()) { LOG_ERROR(Render_Vulkan, "Failed to create file {}", path); return; diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 2258479f5..ae46e0444 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -78,7 +78,7 @@ Common::DynamicLibrary OpenVulkanLibrary() { if (!libvulkan_env || !library.Open(libvulkan_env)) { // Use the libvulkan.dylib from the application bundle. const std::string filename = - FileUtil::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; + Common::FS::GetBundleDirectory() + "/Contents/Frameworks/libvulkan.dylib"; library.Open(filename.c_str()); } #else @@ -237,8 +237,10 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext } // Anonymous namespace -RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system) - : RendererBase(window), system{system} {} +RendererVulkan::RendererVulkan(Core::System& system_, Core::Frontend::EmuWindow& emu_window, + Tegra::GPU& gpu_, + std::unique_ptr<Core::Frontend::GraphicsContext> context) + : RendererBase{emu_window, std::move(context)}, system{system_}, gpu{gpu_} {} RendererVulkan::~RendererVulkan() { ShutDown(); @@ -439,7 +441,7 @@ void RendererVulkan::Report() const { LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version); auto& telemetry_session = system.TelemetrySession(); - constexpr auto field = Telemetry::FieldType::UserSystem; + constexpr auto field = Common::Telemetry::FieldType::UserSystem; telemetry_session.AddField(field, "GPU_Vendor", vendor_name); telemetry_session.AddField(field, "GPU_Model", model_name); telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 522b5bff8..13debbbc0 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -38,7 +38,9 @@ struct VKScreenInfo { class RendererVulkan final : public VideoCore::RendererBase { public: - explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); + explicit RendererVulkan(Core::System& system, Core::Frontend::EmuWindow& emu_window, + Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererVulkan() override; bool Init() override; @@ -58,6 +60,7 @@ private: void Report() const; Core::System& system; + Tegra::GPU& gpu; Common::DynamicLibrary library; vk::InstanceDispatch dld; diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 0c03e4d83..ebcfaa0e3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -382,6 +382,8 @@ bool VKDevice::Create() { graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); + + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); return true; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 529744f2d..26a233db1 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -202,6 +202,11 @@ public: return reported_extensions; } + /// Returns true if the setting for async shader compilation is enabled. + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + /// Checks if the physical device is suitable. static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); @@ -252,6 +257,9 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + // Asynchronous Graphics Pipeline setting + bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline + // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index a02be5487..d7f65d435 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -29,7 +29,7 @@ void InnerFence::Queue() { } ASSERT(!event); - event = device.GetLogical().CreateEvent(); + event = device.GetLogical().CreateNewEvent(); ticks = scheduler.Ticks(); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index aaf930b90..2e46c6278 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche const GraphicsPipelineCacheKey& key, vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program) - : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, + : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_allocator{descriptor_pool, *descriptor_set_layout}, update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( program)}, - renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( - key.renderpass_params, - program)} {} + renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, + pipeline{CreatePipeline(cache_key.renderpass_params, program)} {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { - const auto& state = fixed_state; + const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; FixedPipelineState::DynamicState dynamic; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index a1d699a6c..58aa35efd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -19,7 +19,27 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey; +struct GraphicsPipelineCacheKey { + RenderPassParams renderpass_params; + u32 padding; + std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; + FixedPipelineState fixed_state; + + std::size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + std::size_t Size() const noexcept { + return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); class VKDescriptorPool; class VKDevice; @@ -54,6 +74,10 @@ public: return renderpass; } + GraphicsPipelineCacheKey GetCacheKey() const { + return cache_key; + } + private: vk::DescriptorSetLayout CreateDescriptorSetLayout( vk::Span<VkDescriptorSetLayoutBinding> bindings) const; @@ -70,7 +94,7 @@ private: const VKDevice& device; VKScheduler& scheduler; - const FixedPipelineState fixed_state; + const GraphicsPipelineCacheKey cache_key; const u64 hash; vk::DescriptorSetLayout descriptor_set_layout; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 418c62bc4..cfdcdd6ab 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -28,6 +28,7 @@ #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace Vulkan { @@ -205,24 +206,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { return last_shaders = shaders; } -VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { +VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( + const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { - return *last_graphics_pipeline; + return last_graphics_pipeline; } last_graphics_key = key; + if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) { + std::unique_lock lock{pipeline_cache}; + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); + if (is_cache_miss) { + system.GPU().ShaderNotify().MarkSharderBuilding(); + LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); + const auto [program, bindings] = DecompileShaders(key.fixed_state); + async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, + update_descriptor_queue, renderpass_cache, bindings, + program, key); + } + last_graphics_pipeline = pair->second.get(); + return last_graphics_pipeline; + } + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { + system.GPU().ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key); + const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); + system.GPU().ShaderNotify().MarkShaderComplete(); } - return *(last_graphics_pipeline = entry.get()); + last_graphics_pipeline = entry.get(); + return last_graphics_pipeline; } VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { @@ -277,6 +297,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } +void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { + system.GPU().ShaderNotify().MarkShaderComplete(); + std::unique_lock lock{pipeline_cache}; + graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); +} + void VKPipelineCache::OnShaderRemoval(Shader* shader) { bool finished = false; const auto Finish = [&] { @@ -312,8 +338,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { } std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> -VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { - const auto& fixed_state = key.fixed_state; +VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { auto& memory_manager = system.GPU().MemoryManager(); const auto& gpu = system.GPU().Maxwell3D(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0a3fe65fb..c04829e77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -22,6 +22,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; - std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); - struct ComputePipelineCacheKey { GPUVAddr shader; u32 shared_memory_size; @@ -152,16 +131,19 @@ public: std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); - VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); + VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); + protected: void OnShaderRemoval(Shader* shader) final; private: std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( - const GraphicsPipelineCacheKey& key); + const FixedPipelineState& fixed_state); Core::System& system; const VKDevice& device; @@ -178,6 +160,7 @@ private: GraphicsPipelineCacheKey last_graphics_key; VKGraphicsPipeline* last_graphics_pipeline = nullptr; + std::mutex pipeline_cache; std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> graphics_cache; std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7500e8244..ff1b52eab 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -14,6 +14,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/settings.h" #include "video_core/engines/kepler_compute.h" @@ -400,8 +401,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), sampler_cache(device), fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), - query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { + query_cache(system, *this, device, scheduler), + wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { scheduler.SetQueryCache(query_cache); + if (device.UseAsynchronousShaders()) { + async_shaders.AllocateWorkers(); + } } RasterizerVulkan::~RasterizerVulkan() = default; @@ -413,6 +418,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); + SCOPE_EXIT({ system.GPU().TickWork(); }); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); @@ -439,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { key.renderpass_params = GetRenderPassParams(texceptions); key.padding = 0; - auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); - scheduler.BindGraphicsPipeline(pipeline.GetHandle()); + auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { + // Async graphics pipeline was not ready. + return; + } + + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - const auto renderpass = pipeline.GetRenderPass(); + const auto renderpass = pipeline->GetRenderPass(); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); scheduler.RequestRenderpass(renderpass, framebuffer, render_area); @@ -452,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - const auto pipeline_layout = pipeline.GetLayout(); - const auto descriptor_set = pipeline.CommitDescriptorSet(); + const auto pipeline_layout = pipeline->GetLayout(); + const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, @@ -463,8 +475,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { }); EndTransformFeedback(); - - system.GPU().TickWork(); } void RasterizerVulkan::Clear() { @@ -1433,10 +1443,10 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) { } void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) { - if (!state_tracker.TouchPrimitiveTopology()) { + const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); + if (!state_tracker.ChangePrimitiveTopology(primitive_topology)) { return; } - const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value(); scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) { cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology)); }); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 923178b0b..f640ba649 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -32,6 +32,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" namespace Core { class System; @@ -136,6 +137,14 @@ public: u32 pixel_stride) override; void SetupDirtyFlags() override; + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, @@ -297,6 +306,7 @@ private: vk::Buffer default_buffer; VKMemoryCommit default_buffer_commit; vk::Event wfi_event; + VideoCommon::Shader::AsyncShaders async_shaders; std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index 9151d9fb1..4bd1009f9 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() { flags[DepthWriteEnable] = true; flags[DepthCompareOp] = true; flags[FrontFace] = true; - flags[PrimitiveTopology] = true; flags[StencilOp] = true; flags[StencilTestEnable] = true; return flags; @@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) { table[OFF(screen_y_control)] = FrontFace; } -void SetupDirtyPrimitiveTopology(Tables& tables) { - tables[0][OFF(draw.topology)] = PrimitiveTopology; -} - void SetupDirtyStencilOp(Tables& tables) { auto& table = tables[0]; table[OFF(stencil_front_op_fail)] = StencilOp; @@ -156,13 +151,13 @@ void StateTracker::Initialize() { SetupDirtyDepthWriteEnable(tables); SetupDirtyDepthCompareOp(tables); SetupDirtyFrontFace(tables); - SetupDirtyPrimitiveTopology(tables); SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); } void StateTracker::InvalidateCommandBufferState() { system.GPU().Maxwell3D().dirty.flags |= invalidation_flags; + current_topology = INVALID_TOPOLOGY; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index 54ca0d6c6..13a6ce786 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -32,7 +32,6 @@ enum : u8 { DepthWriteEnable, DepthCompareOp, FrontFace, - PrimitiveTopology, StencilOp, StencilTestEnable, @@ -43,6 +42,8 @@ static_assert(Last <= std::numeric_limits<u8>::max()); } // namespace Dirty class StateTracker { + using Maxwell = Tegra::Engines::Maxwell3D::Regs; + public: explicit StateTracker(Core::System& system); @@ -102,10 +103,6 @@ public: return Exchange(Dirty::FrontFace, false); } - bool TouchPrimitiveTopology() { - return Exchange(Dirty::PrimitiveTopology, false); - } - bool TouchStencilOp() { return Exchange(Dirty::StencilOp, false); } @@ -114,7 +111,15 @@ public: return Exchange(Dirty::StencilTestEnable, false); } + bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) { + const bool has_changed = current_topology != new_topology; + current_topology = new_topology; + return has_changed; + } + private: + static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u); + bool Exchange(std::size_t id, bool new_value) const noexcept { auto& flags = system.GPU().Maxwell3D().dirty.flags; const bool is_dirty = flags[id]; @@ -124,6 +129,7 @@ private: Core::System& system; Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags; + Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 14cac38ea..013865aa4 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons return ShaderModule(object, handle, *dld); } -Event Device::CreateEvent() const { +Event Device::CreateNewEvent() const { static constexpr VkEventCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, .pNext = nullptr, @@ -786,7 +786,7 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp VK_SUCCESS) { return std::nullopt; } - return properties; + return std::move(properties); } std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 31885ef42..b9d3fedc1 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -721,7 +721,7 @@ public: ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; - Event CreateEvent() const; + Event CreateNewEvent() const; SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index b7f66d7ee..f815584f7 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <chrono> #include <condition_variable> #include <mutex> #include <thread> @@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() { KillWorkers(); } -void AsyncShaders::AllocateWorkers(std::size_t num_workers) { - // If we're already have workers queued or don't want to queue workers, ignore - if (num_workers == worker_threads.size() || num_workers == 0) { +void AsyncShaders::AllocateWorkers() { + // Max worker threads we should allow + constexpr u32 MAX_THREADS = 4; + // Deduce how many threads we can use + const u32 threads_used = std::thread::hardware_concurrency() / 4; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1U, threads_used); + // Don't use more than MAX_THREADS + const auto num_workers = std::min(max_worker_count, MAX_THREADS); + + // If we already have workers queued, ignore + if (num_workers == worker_threads.size()) { return; } @@ -34,8 +42,8 @@ void AsyncShaders::AllocateWorkers(std::size_t num_workers) { // Create workers for (std::size_t i = 0; i < num_workers; i++) { context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.push_back(std::move( - std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); + worker_threads.push_back( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())); } } @@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr) { - WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM - : AsyncShaders::Backend::OpenGL, - device, - shader_type, - uid, - std::move(code), - std::move(code_b), - main_offset, - compiler_settings, - registry, - cpu_addr}; + WorkerParams params{ + .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, + .device = &device, + .shader_type = shader_type, + .uid = uid, + .code = std::move(code), + .code_b = std::move(code_b), + .main_offset = main_offset, + .compiler_settings = compiler_settings, + .registry = registry, + .cpu_address = cpu_addr, + }; std::unique_lock lock(queue_mutex); - pending_queue.push_back(std::move(params)); + pending_queue.push(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, + Vulkan::GraphicsPipelineCacheKey key) { + WorkerParams params{ + .backend = Backend::Vulkan, + .pp_cache = pp_cache, + .vk_device = &device, + .scheduler = &scheduler, + .descriptor_pool = &descriptor_pool, + .update_descriptor_queue = &update_descriptor_queue, + .renderpass_cache = &renderpass_cache, + .bindings = bindings, + .program = program, + .key = key, + }; + + std::unique_lock lock(queue_mutex); + pending_queue.push(std::move(params)); cv.notify_one(); } void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - using namespace std::chrono_literals; while (!is_thread_exiting.load(std::memory_order_relaxed)) { std::unique_lock lock{queue_mutex}; cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); @@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context if (pending_queue.empty()) { continue; } + // Pull work from queue WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop_front(); - + pending_queue.pop(); lock.unlock(); - if (work.backend == AsyncShaders::Backend::OpenGL || - work.backend == AsyncShaders::Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); const auto scope = context->Acquire(); auto program = - OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); Result result{}; result.backend = work.backend; result.cpu_address = work.cpu_address; @@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context result.code_b = std::move(work.code_b); result.shader_type = work.shader_type; - if (work.backend == AsyncShaders::Backend::OpenGL) { + if (work.backend == Backend::OpenGL) { result.program.opengl = std::move(program->source_program); - } else if (work.backend == AsyncShaders::Backend::GLASM) { + } else if (work.backend == Backend::GLASM) { result.program.glasm = std::move(program->assembly_program); } @@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context std::unique_lock complete_lock(completed_mutex); finished_work.push_back(std::move(result)); } + } else if (work.backend == Backend::Vulkan) { + auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( + *work.vk_device, *work.scheduler, *work.descriptor_pool, + *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, + work.program); + + work.pp_cache->EmplacePipeline(std::move(pipeline)); } } } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 2f5ee94ad..d5ae814d5 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -14,6 +14,10 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" namespace Core::Frontend { class EmuWindow; @@ -24,6 +28,10 @@ namespace Tegra { class GPU; } +namespace Vulkan { +class VKPipelineCache; +} + namespace VideoCommon::Shader { class AsyncShaders { @@ -31,6 +39,7 @@ public: enum class Backend { OpenGL, GLASM, + Vulkan, }; struct ResultPrograms { @@ -52,7 +61,7 @@ public: ~AsyncShaders(); /// Start up shader worker threads - void AllocateWorkers(std::size_t num_workers); + void AllocateWorkers(); /// Clear the shader queue and kill all worker threads void FreeWorkers(); @@ -76,6 +85,14 @@ public: VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -83,16 +100,28 @@ private: bool HasWorkQueued(); struct WorkerParams { - AsyncShaders::Backend backend; - OpenGL::Device device; + Backend backend; + // For OGL + const OpenGL::Device* device; Tegra::Engines::ShaderType shader_type; u64 uid; std::vector<u64> code; std::vector<u64> code_b; u32 main_offset; VideoCommon::Shader::CompilerSettings compiler_settings; - VideoCommon::Shader::Registry registry; + std::optional<VideoCommon::Shader::Registry> registry; VAddr cpu_address; + + // For Vulkan + Vulkan::VKPipelineCache* pp_cache; + const Vulkan::VKDevice* vk_device; + Vulkan::VKScheduler* scheduler; + Vulkan::VKDescriptorPool* descriptor_pool; + Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; + Vulkan::VKRenderPassCache* renderpass_cache; + std::vector<VkDescriptorSetLayoutBinding> bindings; + Vulkan::SPIRVProgram program; + Vulkan::GraphicsPipelineCacheKey key; }; std::condition_variable cv; @@ -101,7 +130,7 @@ private: std::atomic<bool> is_thread_exiting{}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; std::vector<std::thread> worker_threads; - std::deque<WorkerParams> pending_queue; + std::queue<WorkerParams> pending_queue; std::vector<AsyncShaders::Result> finished_work; Core::Frontend::EmuWindow& emu_window; }; diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index 474ae620a..16d46a018 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -228,24 +228,30 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 } } -void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, - u32 block_height_bit, u32 offset_x, u32 offset_y) { - const u32 block_height = 1U << block_height_bit; - for (u32 line = 0; line < subrect_height; ++line) { - const u32 y2 = line + offset_y; - const u32 gob_address_y = (y2 / (GOB_SIZE_Y * block_height)) * GOB_SIZE * block_height + - ((y2 % (GOB_SIZE_Y * block_height)) / GOB_SIZE_Y) * GOB_SIZE; - const auto& table = LEGACY_SWIZZLE_TABLE[y2 % GOB_SIZE_Y]; - for (u32 x = 0; x < subrect_width; ++x) { - const u32 x2 = (x + offset_x) * bytes_per_pixel; - const u32 gob_address = gob_address_y + (x2 / GOB_SIZE_X) * GOB_SIZE * block_height; - const u32 swizzled_offset = gob_address + table[x2 % GOB_SIZE_X]; - const u32 unswizzled_offset = line * dest_pitch + x * bytes_per_pixel; - u8* dest_line = unswizzled_data + unswizzled_offset; - u8* source_addr = swizzled_data + swizzled_offset; +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, + u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input) { + const u32 stride = width * bytes_per_pixel; + const u32 gobs_in_x = (stride + GOB_SIZE_X - 1) / GOB_SIZE_X; + const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height); + + const u32 block_height_mask = (1U << block_height) - 1; + const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height; + + for (u32 line = 0; line < line_count; ++line) { + const u32 src_y = line + origin_y; + const auto& table = LEGACY_SWIZZLE_TABLE[src_y % GOB_SIZE_Y]; + + const u32 block_y = src_y >> GOB_SIZE_Y_SHIFT; + const u32 src_offset_y = (block_y >> block_height) * block_size + + ((block_y & block_height_mask) << GOB_SIZE_SHIFT); + for (u32 column = 0; column < line_length_in; ++column) { + const u32 src_x = (column + origin_x) * bytes_per_pixel; + const u32 src_offset_x = (src_x >> GOB_SIZE_X_SHIFT) << x_shift; + + const u32 swizzled_offset = src_offset_y + src_offset_x + table[src_x % GOB_SIZE_X]; + const u32 unswizzled_offset = line * pitch + column * bytes_per_pixel; - std::memcpy(dest_line, source_addr, bytes_per_pixel); + std::memcpy(output + unswizzled_offset, input + swizzled_offset, bytes_per_pixel); } } } @@ -261,7 +267,7 @@ void SwizzleSliceToVoxel(u32 line_length_in, u32 line_count, u32 pitch, u32 widt const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); const u32 block_height_mask = (1U << block_height) - 1; - const u32 x_shift = Common::CountTrailingZeroes32(GOB_SIZE << (block_height + block_depth)); + const u32 x_shift = static_cast<u32>(GOB_SIZE_SHIFT) + block_height + block_depth; for (u32 line = 0; line < line_count; ++line) { const auto& table = LEGACY_SWIZZLE_TABLE[line % GOB_SIZE_Y]; diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h index d6fe35d37..01e156bc8 100644 --- a/src/video_core/textures/decoders.h +++ b/src/video_core/textures/decoders.h @@ -48,9 +48,8 @@ void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 u32 block_height_bit, u32 offset_x, u32 offset_y); /// Copies a tiled subrectangle into a linear surface. -void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width, - u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, - u32 offset_x, u32 offset_y); +void UnswizzleSubrect(u32 line_length_in, u32 line_count, u32 pitch, u32 width, u32 bytes_per_pixel, + u32 block_height, u32 origin_x, u32 origin_y, u8* output, const u8* input); /// @brief Swizzles a 2D array of pixels into a 3D texture /// @param line_length_in Number of pixels per line diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 45f360bdd..4e3a092c7 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <memory> + #include "common/logging/log.h" #include "core/core.h" #include "core/settings.h" @@ -16,37 +17,46 @@ #include "video_core/video_core.h" namespace { -std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window, - Core::System& system, - Core::Frontend::GraphicsContext& context) { + +std::unique_ptr<VideoCore::RendererBase> CreateRenderer( + Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu, + std::unique_ptr<Core::Frontend::GraphicsContext> context) { switch (Settings::values.renderer_backend.GetValue()) { case Settings::RendererBackend::OpenGL: - return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context); + return std::make_unique<OpenGL::RendererOpenGL>(system, emu_window, gpu, + std::move(context)); #ifdef HAS_VULKAN case Settings::RendererBackend::Vulkan: - return std::make_unique<Vulkan::RendererVulkan>(emu_window, system); + return std::make_unique<Vulkan::RendererVulkan>(system, emu_window, gpu, + std::move(context)); #endif default: return nullptr; } } + } // Anonymous namespace namespace VideoCore { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { + std::unique_ptr<Tegra::GPU> gpu; + if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { + gpu = std::make_unique<VideoCommon::GPUAsynch>(system); + } else { + gpu = std::make_unique<VideoCommon::GPUSynch>(system); + } + auto context = emu_window.CreateSharedContext(); const auto scope = context->Acquire(); - auto renderer = CreateRenderer(emu_window, system, *context); + + auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context)); if (!renderer->Init()) { return nullptr; } - if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) { - return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer), - std::move(context)); - } - return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context)); + gpu->BindRenderer(std::move(renderer)); + return gpu; } u16 GetResolutionScaleFactor(const RendererBase& renderer) { |