diff options
Diffstat (limited to 'src')
24 files changed, 268 insertions, 121 deletions
diff --git a/src/core/crypto/aes_util.cpp b/src/core/crypto/aes_util.cpp index 330996b24..6a9734812 100644 --- a/src/core/crypto/aes_util.cpp +++ b/src/core/crypto/aes_util.cpp @@ -116,7 +116,7 @@ void AESCipher<Key, KeySize>::XTSTranscode(const u8* src, std::size_t size, u8* for (std::size_t i = 0; i < size; i += sector_size) { SetIV(CalculateNintendoTweak(sector_id++)); - Transcode<u8, u8>(src + i, sector_size, dest + i, op); + Transcode(src + i, sector_size, dest + i, op); } } diff --git a/src/core/crypto/partition_data_manager.cpp b/src/core/crypto/partition_data_manager.cpp index 3e96f7516..46136d04a 100644 --- a/src/core/crypto/partition_data_manager.cpp +++ b/src/core/crypto/partition_data_manager.cpp @@ -367,8 +367,8 @@ static bool AttemptDecrypt(const std::array<u8, 16>& key, Package2Header& header Package2Header temp = header; AESCipher<Key128> cipher(key, Mode::CTR); cipher.SetIV(header.header_ctr); - cipher.Transcode(&temp.header_ctr, sizeof(Package2Header) - 0x100, &temp.header_ctr, - Op::Decrypt); + cipher.Transcode(&temp.header_ctr, sizeof(Package2Header) - sizeof(Package2Header::signature), + &temp.header_ctr, Op::Decrypt); if (temp.magic == Common::MakeMagic('P', 'K', '2', '1')) { header = temp; return true; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index cb284db77..4af5824cd 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind } if (device.UseAsynchronousShaders()) { - // Max worker threads we should allow - constexpr u32 MAX_THREADS = 4; - // Deduce how many threads we can use - const u32 threads_used = std::thread::hardware_concurrency() / 4; - // Always allow at least 1 thread regardless of our settings - const auto max_worker_count = std::max(1U, threads_used); - // Don't use more than MAX_THREADS - const auto worker_count = std::min(max_worker_count, MAX_THREADS); - async_shaders.AllocateWorkers(worker_count); + async_shaders.AllocateWorkers(); } } diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 0c03e4d83..ebcfaa0e3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -382,6 +382,8 @@ bool VKDevice::Create() { graphics_queue = logical.GetQueue(graphics_family); present_queue = logical.GetQueue(present_family); + + use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue(); return true; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 529744f2d..26a233db1 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -202,6 +202,11 @@ public: return reported_extensions; } + /// Returns true if the setting for async shader compilation is enabled. + bool UseAsynchronousShaders() const { + return use_asynchronous_shaders; + } + /// Checks if the physical device is suitable. static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); @@ -252,6 +257,9 @@ private: bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. + // Asynchronous Graphics Pipeline setting + bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline + // Telemetry parameters std::string vendor_name; ///< Device's driver name. std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp index a02be5487..d7f65d435 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp @@ -29,7 +29,7 @@ void InnerFence::Queue() { } ASSERT(!event); - event = device.GetLogical().CreateEvent(); + event = device.GetLogical().CreateNewEvent(); ticks = scheduler.Ticks(); scheduler.RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index aaf930b90..2e46c6278 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche const GraphicsPipelineCacheKey& key, vk::Span<VkDescriptorSetLayoutBinding> bindings, const SPIRVProgram& program) - : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, + : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_allocator{descriptor_pool, *descriptor_set_layout}, update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( program)}, - renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( - key.renderpass_params, - program)} {} + renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)}, + pipeline{CreatePipeline(cache_key.renderpass_params, program)} {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { - const auto& state = fixed_state; + const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; FixedPipelineState::DynamicState dynamic; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index a1d699a6c..58aa35efd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -19,7 +19,27 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey; +struct GraphicsPipelineCacheKey { + RenderPassParams renderpass_params; + u32 padding; + std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; + FixedPipelineState fixed_state; + + std::size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + std::size_t Size() const noexcept { + return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); +static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); class VKDescriptorPool; class VKDevice; @@ -54,6 +74,10 @@ public: return renderpass; } + GraphicsPipelineCacheKey GetCacheKey() const { + return cache_key; + } + private: vk::DescriptorSetLayout CreateDescriptorSetLayout( vk::Span<VkDescriptorSetLayoutBinding> bindings) const; @@ -70,7 +94,7 @@ private: const VKDevice& device; VKScheduler& scheduler; - const FixedPipelineState fixed_state; + const GraphicsPipelineCacheKey cache_key; const u64 hash; vk::DescriptorSetLayout descriptor_set_layout; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 418c62bc4..cfdcdd6ab 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -28,6 +28,7 @@ #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace Vulkan { @@ -205,24 +206,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() { return last_shaders = shaders; } -VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { +VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( + const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { MICROPROFILE_SCOPE(Vulkan_PipelineCache); if (last_graphics_pipeline && last_graphics_key == key) { - return *last_graphics_pipeline; + return last_graphics_pipeline; } last_graphics_key = key; + if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) { + std::unique_lock lock{pipeline_cache}; + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); + if (is_cache_miss) { + system.GPU().ShaderNotify().MarkSharderBuilding(); + LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); + const auto [program, bindings] = DecompileShaders(key.fixed_state); + async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, + update_descriptor_queue, renderpass_cache, bindings, + program, key); + } + last_graphics_pipeline = pair->second.get(); + return last_graphics_pipeline; + } + const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { + system.GPU().ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); - const auto [program, bindings] = DecompileShaders(key); + const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); + system.GPU().ShaderNotify().MarkShaderComplete(); } - return *(last_graphics_pipeline = entry.get()); + last_graphics_pipeline = entry.get(); + return last_graphics_pipeline; } VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { @@ -277,6 +297,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach return *entry; } +void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) { + system.GPU().ShaderNotify().MarkShaderComplete(); + std::unique_lock lock{pipeline_cache}; + graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); +} + void VKPipelineCache::OnShaderRemoval(Shader* shader) { bool finished = false; const auto Finish = [&] { @@ -312,8 +338,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) { } std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> -VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { - const auto& fixed_state = key.fixed_state; +VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) { auto& memory_manager = system.GPU().MemoryManager(); const auto& gpu = system.GPU().Maxwell3D(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 0a3fe65fb..c04829e77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -22,6 +22,7 @@ #include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" #include "video_core/shader/memory_util.h" #include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" @@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; - std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>); -static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>); - struct ComputePipelineCacheKey { GPUVAddr shader; u32 shared_memory_size; @@ -152,16 +131,19 @@ public: std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); - VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); + VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, + VideoCommon::Shader::AsyncShaders& async_shaders); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); + void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline); + protected: void OnShaderRemoval(Shader* shader) final; private: std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( - const GraphicsPipelineCacheKey& key); + const FixedPipelineState& fixed_state); Core::System& system; const VKDevice& device; @@ -178,6 +160,7 @@ private: GraphicsPipelineCacheKey last_graphics_key; VKGraphicsPipeline* last_graphics_pipeline = nullptr; + std::mutex pipeline_cache; std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> graphics_cache; std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7500e8244..936f76195 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -14,6 +14,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/settings.h" #include "video_core/engines/kepler_compute.h" @@ -400,8 +401,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), sampler_cache(device), fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), - query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { + query_cache(system, *this, device, scheduler), + wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { scheduler.SetQueryCache(query_cache); + if (device.UseAsynchronousShaders()) { + async_shaders.AllocateWorkers(); + } } RasterizerVulkan::~RasterizerVulkan() = default; @@ -413,6 +418,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); + SCOPE_EXIT({ system.GPU().TickWork(); }); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); @@ -439,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { key.renderpass_params = GetRenderPassParams(texceptions); key.padding = 0; - auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); - scheduler.BindGraphicsPipeline(pipeline.GetHandle()); + auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { + // Async graphics pipeline was not ready. + return; + } + + scheduler.BindGraphicsPipeline(pipeline->GetHandle()); - const auto renderpass = pipeline.GetRenderPass(); + const auto renderpass = pipeline->GetRenderPass(); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); scheduler.RequestRenderpass(renderpass, framebuffer, render_area); @@ -452,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { BeginTransformFeedback(); - const auto pipeline_layout = pipeline.GetLayout(); - const auto descriptor_set = pipeline.CommitDescriptorSet(); + const auto pipeline_layout = pipeline->GetLayout(); + const auto descriptor_set = pipeline->CommitDescriptorSet(); scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { if (descriptor_set) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, @@ -463,8 +475,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { }); EndTransformFeedback(); - - system.GPU().TickWork(); } void RasterizerVulkan::Clear() { diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 923178b0b..f640ba649 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -32,6 +32,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/wrapper.h" +#include "video_core/shader/async_shaders.h" namespace Core { class System; @@ -136,6 +137,14 @@ public: u32 pixel_stride) override; void SetupDirtyFlags() override; + VideoCommon::Shader::AsyncShaders& GetAsyncShaders() { + return async_shaders; + } + + const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const { + return async_shaders; + } + /// Maximum supported size that a constbuffer can have in bytes. static constexpr std::size_t MaxConstbufferSize = 0x10000; static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, @@ -297,6 +306,7 @@ private: vk::Buffer default_buffer; VKMemoryCommit default_buffer_commit; vk::Event wfi_event; + VideoCommon::Shader::AsyncShaders async_shaders; std::array<View, Maxwell::NumRenderTargets> color_attachments; View zeta_attachment; diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 14cac38ea..c43d60adf 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons return ShaderModule(object, handle, *dld); } -Event Device::CreateEvent() const { +Event Device::CreateNewEvent() const { static constexpr VkEventCreateInfo ci{ .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, .pNext = nullptr, diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index 31885ef42..b9d3fedc1 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -721,7 +721,7 @@ public: ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; - Event CreateEvent() const; + Event CreateNewEvent() const; SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index b7f66d7ee..f815584f7 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <chrono> #include <condition_variable> #include <mutex> #include <thread> @@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() { KillWorkers(); } -void AsyncShaders::AllocateWorkers(std::size_t num_workers) { - // If we're already have workers queued or don't want to queue workers, ignore - if (num_workers == worker_threads.size() || num_workers == 0) { +void AsyncShaders::AllocateWorkers() { + // Max worker threads we should allow + constexpr u32 MAX_THREADS = 4; + // Deduce how many threads we can use + const u32 threads_used = std::thread::hardware_concurrency() / 4; + // Always allow at least 1 thread regardless of our settings + const auto max_worker_count = std::max(1U, threads_used); + // Don't use more than MAX_THREADS + const auto num_workers = std::min(max_worker_count, MAX_THREADS); + + // If we already have workers queued, ignore + if (num_workers == worker_threads.size()) { return; } @@ -34,8 +42,8 @@ void AsyncShaders::AllocateWorkers(std::size_t num_workers) { // Create workers for (std::size_t i = 0; i < num_workers; i++) { context_list.push_back(emu_window.CreateSharedContext()); - worker_threads.push_back(std::move( - std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()))); + worker_threads.push_back( + std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get())); } } @@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr) { - WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM - : AsyncShaders::Backend::OpenGL, - device, - shader_type, - uid, - std::move(code), - std::move(code_b), - main_offset, - compiler_settings, - registry, - cpu_addr}; + WorkerParams params{ + .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, + .device = &device, + .shader_type = shader_type, + .uid = uid, + .code = std::move(code), + .code_b = std::move(code_b), + .main_offset = main_offset, + .compiler_settings = compiler_settings, + .registry = registry, + .cpu_address = cpu_addr, + }; std::unique_lock lock(queue_mutex); - pending_queue.push_back(std::move(params)); + pending_queue.push(std::move(params)); + cv.notify_one(); +} + +void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, + Vulkan::GraphicsPipelineCacheKey key) { + WorkerParams params{ + .backend = Backend::Vulkan, + .pp_cache = pp_cache, + .vk_device = &device, + .scheduler = &scheduler, + .descriptor_pool = &descriptor_pool, + .update_descriptor_queue = &update_descriptor_queue, + .renderpass_cache = &renderpass_cache, + .bindings = bindings, + .program = program, + .key = key, + }; + + std::unique_lock lock(queue_mutex); + pending_queue.push(std::move(params)); cv.notify_one(); } void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - using namespace std::chrono_literals; while (!is_thread_exiting.load(std::memory_order_relaxed)) { std::unique_lock lock{queue_mutex}; cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); @@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context if (pending_queue.empty()) { continue; } + // Pull work from queue WorkerParams work = std::move(pending_queue.front()); - pending_queue.pop_front(); - + pending_queue.pop(); lock.unlock(); - if (work.backend == AsyncShaders::Backend::OpenGL || - work.backend == AsyncShaders::Backend::GLASM) { - const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry); + if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry); const auto scope = context->Acquire(); auto program = - OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); + OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry); Result result{}; result.backend = work.backend; result.cpu_address = work.cpu_address; @@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context result.code_b = std::move(work.code_b); result.shader_type = work.shader_type; - if (work.backend == AsyncShaders::Backend::OpenGL) { + if (work.backend == Backend::OpenGL) { result.program.opengl = std::move(program->source_program); - } else if (work.backend == AsyncShaders::Backend::GLASM) { + } else if (work.backend == Backend::GLASM) { result.program.glasm = std::move(program->assembly_program); } @@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context std::unique_lock complete_lock(completed_mutex); finished_work.push_back(std::move(result)); } + } else if (work.backend == Backend::Vulkan) { + auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( + *work.vk_device, *work.scheduler, *work.descriptor_pool, + *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, + work.program); + + work.pp_cache->EmplacePipeline(std::move(pipeline)); } } } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index 2f5ee94ad..d5ae814d5 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -14,6 +14,10 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" namespace Core::Frontend { class EmuWindow; @@ -24,6 +28,10 @@ namespace Tegra { class GPU; } +namespace Vulkan { +class VKPipelineCache; +} + namespace VideoCommon::Shader { class AsyncShaders { @@ -31,6 +39,7 @@ public: enum class Backend { OpenGL, GLASM, + Vulkan, }; struct ResultPrograms { @@ -52,7 +61,7 @@ public: ~AsyncShaders(); /// Start up shader worker threads - void AllocateWorkers(std::size_t num_workers); + void AllocateWorkers(); /// Clear the shader queue and kill all worker threads void FreeWorkers(); @@ -76,6 +85,14 @@ public: VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector<VkDescriptorSetLayoutBinding> bindings, + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); + private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -83,16 +100,28 @@ private: bool HasWorkQueued(); struct WorkerParams { - AsyncShaders::Backend backend; - OpenGL::Device device; + Backend backend; + // For OGL + const OpenGL::Device* device; Tegra::Engines::ShaderType shader_type; u64 uid; std::vector<u64> code; std::vector<u64> code_b; u32 main_offset; VideoCommon::Shader::CompilerSettings compiler_settings; - VideoCommon::Shader::Registry registry; + std::optional<VideoCommon::Shader::Registry> registry; VAddr cpu_address; + + // For Vulkan + Vulkan::VKPipelineCache* pp_cache; + const Vulkan::VKDevice* vk_device; + Vulkan::VKScheduler* scheduler; + Vulkan::VKDescriptorPool* descriptor_pool; + Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; + Vulkan::VKRenderPassCache* renderpass_cache; + std::vector<VkDescriptorSetLayoutBinding> bindings; + Vulkan::SPIRVProgram program; + Vulkan::GraphicsPipelineCacheKey key; }; std::condition_variable cv; @@ -101,7 +130,7 @@ private: std::atomic<bool> is_thread_exiting{}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; std::vector<std::thread> worker_threads; - std::deque<WorkerParams> pending_queue; + std::queue<WorkerParams> pending_queue; std::vector<AsyncShaders::Result> finished_work; Core::Frontend::EmuWindow& emu_window; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index a793c803d..846a30586 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -92,7 +92,7 @@ <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> </property> <property name="text"> - <string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string> + <string>Use asynchronous shader building (experimental)</string> </property> </widget> </item> diff --git a/src/yuzu/debugger/profiler.cpp b/src/yuzu/debugger/profiler.cpp index 53049ffd6..0e26f765b 100644 --- a/src/yuzu/debugger/profiler.cpp +++ b/src/yuzu/debugger/profiler.cpp @@ -109,8 +109,7 @@ MicroProfileWidget::MicroProfileWidget(QWidget* parent) : QWidget(parent) { MicroProfileSetDisplayMode(1); // Timers screen MicroProfileInitUI(); - connect(&update_timer, &QTimer::timeout, this, - static_cast<void (MicroProfileWidget::*)()>(&MicroProfileWidget::update)); + connect(&update_timer, &QTimer::timeout, this, qOverload<>(&MicroProfileWidget::update)); } void MicroProfileWidget::paintEvent(QPaintEvent* ev) { diff --git a/src/yuzu/game_list.cpp b/src/yuzu/game_list.cpp index 62acc3720..967ef4a21 100644 --- a/src/yuzu/game_list.cpp +++ b/src/yuzu/game_list.cpp @@ -406,7 +406,7 @@ bool GameList::isEmpty() const { type == GameListItemType::SysNandDir)) { item_model->invisibleRootItem()->removeRow(child->row()); i--; - }; + } } return !item_model->invisibleRootItem()->hasChildren(); } diff --git a/src/yuzu/game_list_worker.cpp b/src/yuzu/game_list_worker.cpp index 643ca6491..c9a395222 100644 --- a/src/yuzu/game_list_worker.cpp +++ b/src/yuzu/game_list_worker.cpp @@ -374,7 +374,7 @@ void GameListWorker::run() { ScanFileSystem(ScanTarget::PopulateGameList, game_dir.path.toStdString(), game_dir.deep_scan ? 256 : 0, game_list_dir); } - }; + } emit Finished(watch_list); } diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 592993c36..3ef59fbad 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -894,6 +894,8 @@ void GMainWindow::ConnectMenuEvents() { connect(ui.action_Open_FAQ, &QAction::triggered, this, &GMainWindow::OnOpenFAQ); connect(ui.action_Restart, &QAction::triggered, this, [this] { BootGame(QString(game_path)); }); connect(ui.action_Configure, &QAction::triggered, this, &GMainWindow::OnConfigure); + connect(ui.action_Configure_Current_Game, &QAction::triggered, this, + &GMainWindow::OnConfigurePerGame); // View connect(ui.action_Single_Window_Mode, &QAction::triggered, this, @@ -1167,6 +1169,7 @@ void GMainWindow::ShutdownGame() { ui.action_Pause->setEnabled(false); ui.action_Stop->setEnabled(false); ui.action_Restart->setEnabled(false); + ui.action_Configure_Current_Game->setEnabled(false); ui.action_Report_Compatibility->setEnabled(false); ui.action_Load_Amiibo->setEnabled(false); ui.action_Capture_Screenshot->setEnabled(false); @@ -1718,26 +1721,7 @@ void GMainWindow::OnGameListOpenPerGameProperties(const std::string& file) { return; } - ConfigurePerGame dialog(this, title_id); - dialog.LoadFromFile(v_file); - auto result = dialog.exec(); - if (result == QDialog::Accepted) { - dialog.ApplyConfiguration(); - - const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); - if (reload) { - game_list->PopulateAsync(UISettings::values.game_dirs); - } - - // Do not cause the global config to write local settings into the config file - Settings::RestoreGlobalState(); - - if (!Core::System::GetInstance().IsPoweredOn()) { - config->Save(); - } - } else { - Settings::RestoreGlobalState(); - } + OpenPerGameConfiguration(title_id, file); } void GMainWindow::OnMenuLoadFile() { @@ -2066,6 +2050,7 @@ void GMainWindow::OnStartGame() { ui.action_Pause->setEnabled(true); ui.action_Stop->setEnabled(true); ui.action_Restart->setEnabled(true); + ui.action_Configure_Current_Game->setEnabled(true); ui.action_Report_Compatibility->setEnabled(true); discord_rpc->Update(); @@ -2255,6 +2240,36 @@ void GMainWindow::OnConfigure() { UpdateStatusButtons(); } +void GMainWindow::OnConfigurePerGame() { + const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); + OpenPerGameConfiguration(title_id, game_path.toStdString()); +} + +void GMainWindow::OpenPerGameConfiguration(u64 title_id, const std::string& file_name) { + const auto v_file = Core::GetGameFileFromPath(vfs, file_name); + + ConfigurePerGame dialog(this, title_id); + dialog.LoadFromFile(v_file); + auto result = dialog.exec(); + if (result == QDialog::Accepted) { + dialog.ApplyConfiguration(); + + const auto reload = UISettings::values.is_game_list_reload_pending.exchange(false); + if (reload) { + game_list->PopulateAsync(UISettings::values.game_dirs); + } + + // Do not cause the global config to write local settings into the config file + Settings::RestoreGlobalState(); + + if (!Core::System::GetInstance().IsPoweredOn()) { + config->Save(); + } + } else { + Settings::RestoreGlobalState(); + } +} + void GMainWindow::OnLoadAmiibo() { const QString extensions{QStringLiteral("*.bin")}; const QString file_filter = tr("Amiibo File (%1);; All Files (*.*)").arg(extensions); diff --git a/src/yuzu/main.h b/src/yuzu/main.h index 73a44a3bf..64c33830d 100644 --- a/src/yuzu/main.h +++ b/src/yuzu/main.h @@ -216,6 +216,7 @@ private slots: void OnMenuInstallToNAND(); void OnMenuRecentFile(); void OnConfigure(); + void OnConfigurePerGame(); void OnLoadAmiibo(); void OnOpenYuzuFolder(); void OnAbout(); @@ -249,6 +250,7 @@ private: void ShowMouseCursor(); void OpenURL(const QUrl& url); void LoadTranslation(); + void OpenPerGameConfiguration(u64 title_id, const std::string& file_name); Ui::MainWindow ui; diff --git a/src/yuzu/main.ui b/src/yuzu/main.ui index c3a1d715e..87ea985d8 100644 --- a/src/yuzu/main.ui +++ b/src/yuzu/main.ui @@ -81,6 +81,7 @@ <addaction name="action_Restart"/> <addaction name="separator"/> <addaction name="action_Configure"/> + <addaction name="action_Configure_Current_Game"/> </widget> <widget class="QMenu" name="menu_View"> <property name="title"> @@ -287,6 +288,14 @@ <string>Capture Screenshot</string> </property> </action> + <action name="action_Configure_Current_Game"> + <property name="enabled"> + <bool>false</bool> + </property> + <property name="text"> + <string>Configure Current Game..</string> + </property> + </action> </widget> <resources/> <connections/> diff --git a/src/yuzu/uisettings.h b/src/yuzu/uisettings.h index bbfeafc55..2d2e82f15 100644 --- a/src/yuzu/uisettings.h +++ b/src/yuzu/uisettings.h @@ -29,14 +29,14 @@ extern const Themes themes; struct GameDir { QString path; - bool deep_scan; - bool expanded; + bool deep_scan = false; + bool expanded = false; bool operator==(const GameDir& rhs) const { return path == rhs.path; - }; + } bool operator!=(const GameDir& rhs) const { return !operator==(rhs); - }; + } }; struct Values { |