author: bunnei <bunneidev@gmail.com> 2019-12-19 04:04:08 +0100
committer: GitHub <noreply@github.com> 2019-12-19 04:04:08 +0100
commit: d53cf05513947d29c48c3b6ade4f92326b4f0a02 (patch)
tree: 0f49fa41e3df929a7092a88815689a6162b60142 /src/video_core
parent: Merge pull request #3227 from amilajack/patch-1 (diff)
parent: vk_scheduler: Delegate commands to a worker thread and state track (diff)
download: yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar
yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.gz
yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.bz2
yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.lz
yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.xz
yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.zst
yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.zip
2 files changed, 311 insertions, 37 deletions
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 0f8116458..d66133ad1 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.
 
 #include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -11,46 +11,172 @@
 
 namespace Vulkan {
 
+MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
+
+void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+                                           const vk::DispatchLoaderDynamic& dld) {
+    auto command = first;
+    while (command != nullptr) {
+        auto next = command->GetNext();
+        command->Execute(cmdbuf, dld);
+        command->~Command();
+        command = next;
+    }
+
+    command_offset = 0;
+    first = nullptr;
+    last = nullptr;
+}
+
 VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
-    : device{device}, resource_manager{resource_manager} {
-    next_fence = &resource_manager.CommitFence();
+    : device{device}, resource_manager{resource_manager}, next_fence{
+                                                              &resource_manager.CommitFence()} {
+    AcquireNewChunk();
     AllocateNewContext();
+    worker_thread = std::thread(&VKScheduler::WorkerThread, this);
 }
 
-VKScheduler::~VKScheduler() = default;
+VKScheduler::~VKScheduler() {
+    quit = true;
+    cv.notify_all();
+    worker_thread.join();
+}
 
 void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
     SubmitExecution(semaphore);
-    if (release_fence)
+    if (release_fence) {
         current_fence->Release();
+    }
     AllocateNewContext();
 }
 
 void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
     SubmitExecution(semaphore);
     current_fence->Wait();
-    if (release_fence)
+    if (release_fence) {
         current_fence->Release();
+    }
     AllocateNewContext();
 }
 
+void VKScheduler::WaitWorker() {
+    MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
+    DispatchWork();
+
+    bool finished = false;
+    do {
+        cv.notify_all();
+        std::unique_lock lock{mutex};
+        finished = chunk_queue.Empty();
+    } while (!finished);
+}
+
+void VKScheduler::DispatchWork() {
+    if (chunk->Empty()) {
+        return;
+    }
+    chunk_queue.Push(std::move(chunk));
+    cv.notify_all();
+    AcquireNewChunk();
+}
+
+void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
+    if (state.renderpass && renderpass_bi == *state.renderpass) {
+        return;
+    }
+    const bool end_renderpass = state.renderpass.has_value();
+    state.renderpass = renderpass_bi;
+    Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
+        if (end_renderpass) {
+            cmdbuf.endRenderPass(dld);
+        }
+        cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+    });
+}
+
+void VKScheduler::RequestOutsideRenderPassOperationContext() {
+    EndRenderPass();
+}
+
+void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
+    if (state.graphics_pipeline == pipeline) {
+        return;
+    }
+    state.graphics_pipeline = pipeline;
+    Record([pipeline](auto cmdbuf, auto& dld) {
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+    });
+}
+
+void VKScheduler::WorkerThread() {
+    std::unique_lock lock{mutex};
+    do {
+        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
+        if (quit) {
+            continue;
+        }
+        auto extracted_chunk = std::move(chunk_queue.Front());
+        chunk_queue.Pop();
+        extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
+        chunk_reserve.Push(std::move(extracted_chunk));
+    } while (!quit);
+}
+
 void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+    EndPendingOperations();
+    InvalidateState();
+    WaitWorker();
+
+    std::unique_lock lock{mutex};
+
+    const auto queue = device.GetGraphicsQueue();
     const auto& dld = device.GetDispatchLoader();
     current_cmdbuf.end(dld);
 
-    const auto queue = device.GetGraphicsQueue();
-    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+    const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
                                      &semaphore);
-    queue.submit({submit_info}, *current_fence, dld);
+    queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
 }
 
 void VKScheduler::AllocateNewContext() {
+    std::unique_lock lock{mutex};
     current_fence = next_fence;
-    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
     next_fence = &resource_manager.CommitFence();
 
-    const auto& dld = device.GetDispatchLoader();
-    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+    current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+    current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
+                         device.GetDispatchLoader());
+}
+
+void VKScheduler::InvalidateState() {
+    state.graphics_pipeline = nullptr;
+    state.viewports = false;
+    state.scissors = false;
+    state.depth_bias = false;
+    state.blend_constants = false;
+    state.depth_bounds = false;
+    state.stencil_values = false;
+}
+
+void VKScheduler::EndPendingOperations() {
+    EndRenderPass();
+}
+
+void VKScheduler::EndRenderPass() {
+    if (!state.renderpass) {
+        return;
+    }
+    state.renderpass = std::nullopt;
+    Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
+}
+
+void VKScheduler::AcquireNewChunk() {
+    if (chunk_reserve.Empty()) {
+        chunk = std::make_unique<CommandChunk>();
+        return;
+    }
+    chunk = std::move(chunk_reserve.Front());
+    chunk_reserve.Pop();
 }
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 0e5b49c7f..bcdffbba0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,7 +4,14 @@
 
 #pragma once
 
+#include <condition_variable>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <thread>
+#include <utility>
 #include "common/common_types.h"
+#include "common/threadsafe_queue.h"
 #include "video_core/renderer_vulkan/declarations.h"
 
 namespace Vulkan {
@@ -30,56 +37,197 @@ private:
     VKFence* const& fence;
 };
 
-class VKCommandBufferView {
+/// The scheduler abstracts command buffer and fence management with an interface that's able to do
+/// OpenGL-like operations on Vulkan command buffers.
+class VKScheduler {
 public:
-    VKCommandBufferView() = default;
-    VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
+    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
+    ~VKScheduler();
+
+    /// Sends the current execution context to the GPU.
+    void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+
+    /// Sends the current execution context to the GPU and waits for it to complete.
+    void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+
+    /// Waits for the worker thread to finish executing everything. After this function returns it's
+    /// safe to touch worker resources.
+    void WaitWorker();
+
+    /// Sends currently recorded work to the worker thread.
+    void DispatchWork();
+
+    /// Requests to begin a renderpass.
+    void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
+
+    /// Requests the current executino context to be able to execute operations only allowed outside
+    /// of a renderpass.
+    void RequestOutsideRenderPassOperationContext();
+
+    /// Binds a pipeline to the current execution context.
+    void BindGraphicsPipeline(vk::Pipeline pipeline);
 
-    const vk::CommandBuffer* operator->() const noexcept {
-        return &cmdbuf;
+    /// Returns true when viewports have been set in the current command buffer.
+    bool TouchViewports() {
+        return std::exchange(state.viewports, true);
     }
 
-    operator vk::CommandBuffer() const noexcept {
-        return cmdbuf;
+    /// Returns true when scissors have been set in the current command buffer.
+    bool TouchScissors() {
+        return std::exchange(state.scissors, true);
     }
 
-private:
-    const vk::CommandBuffer& cmdbuf;
-};
+    /// Returns true when depth bias have been set in the current command buffer.
+    bool TouchDepthBias() {
+        return std::exchange(state.depth_bias, true);
+    }
 
-/// The scheduler abstracts command buffer and fence management with an interface that's able to do
-/// OpenGL-like operations on Vulkan command buffers.
-class VKScheduler {
-public:
-    explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
-    ~VKScheduler();
+    /// Returns true when blend constants have been set in the current command buffer.
+    bool TouchBlendConstants() {
+        return std::exchange(state.blend_constants, true);
+    }
+
+    /// Returns true when depth bounds have been set in the current command buffer.
+    bool TouchDepthBounds() {
+        return std::exchange(state.depth_bounds, true);
+    }
+
+    /// Returns true when stencil values have been set in the current command buffer.
+    bool TouchStencilValues() {
+        return std::exchange(state.stencil_values, true);
+    }
+
+    /// Send work to a separate thread.
+    template <typename T>
+    void Record(T&& command) {
+        if (chunk->Record(command)) {
+            return;
+        }
+        DispatchWork();
+        (void)chunk->Record(command);
+    }
 
     /// Gets a reference to the current fence.
     VKFenceView GetFence() const {
         return current_fence;
     }
 
-    /// Gets a reference to the current command buffer.
-    VKCommandBufferView GetCommandBuffer() const {
-        return current_cmdbuf;
-    }
+private:
+    class Command {
+    public:
+        virtual ~Command() = default;
 
-    /// Sends the current execution context to the GPU.
-    void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+        virtual void Execute(vk::CommandBuffer cmdbuf,
+                             const vk::DispatchLoaderDynamic& dld) const = 0;
 
-    /// Sends the current execution context to the GPU and waits for it to complete.
-    void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+        Command* GetNext() const {
+            return next;
+        }
+
+        void SetNext(Command* next_) {
+            next = next_;
+        }
+
+    private:
+        Command* next = nullptr;
+    };
+
+    template <typename T>
+    class TypedCommand final : public Command {
+    public:
+        explicit TypedCommand(T&& command) : command{std::move(command)} {}
+        ~TypedCommand() override = default;
+
+        TypedCommand(TypedCommand&&) = delete;
+        TypedCommand& operator=(TypedCommand&&) = delete;
+
+        void Execute(vk::CommandBuffer cmdbuf,
+                     const vk::DispatchLoaderDynamic& dld) const override {
+            command(cmdbuf, dld);
+        }
+
+    private:
+        T command;
+    };
+
+    class CommandChunk final {
+    public:
+        void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
+
+        template <typename T>
+        bool Record(T& command) {
+            using FuncType = TypedCommand<T>;
+            static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+
+            if (command_offset > sizeof(data) - sizeof(FuncType)) {
+                return false;
+            }
+
+            Command* current_last = last;
+
+            last = new (data.data() + command_offset) FuncType(std::move(command));
+
+            if (current_last) {
+                current_last->SetNext(last);
+            } else {
+                first = last;
+            }
+
+            command_offset += sizeof(FuncType);
+            return true;
+        }
+
+        bool Empty() const {
+            return command_offset == 0;
+        }
+
+    private:
+        Command* first = nullptr;
+        Command* last = nullptr;
+
+        std::size_t command_offset = 0;
+        std::array<u8, 0x8000> data{};
+    };
+
+    void WorkerThread();
 
-private:
     void SubmitExecution(vk::Semaphore semaphore);
 
     void AllocateNewContext();
 
+    void InvalidateState();
+
+    void EndPendingOperations();
+
+    void EndRenderPass();
+
+    void AcquireNewChunk();
+
     const VKDevice& device;
     VKResourceManager& resource_manager;
     vk::CommandBuffer current_cmdbuf;
     VKFence* current_fence = nullptr;
     VKFence* next_fence = nullptr;
+
+    struct State {
+        std::optional<vk::RenderPassBeginInfo> renderpass;
+        vk::Pipeline graphics_pipeline;
+        bool viewports = false;
+        bool scissors = false;
+        bool depth_bias = false;
+        bool blend_constants = false;
+        bool depth_bounds = false;
+        bool stencil_values = false;
+    } state;
+
+    std::unique_ptr<CommandChunk> chunk;
+    std::thread worker_thread;
+
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
+    Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
+    std::mutex mutex;
+    std::condition_variable cv;
+    bool quit = false;
 };
 
 } // namespace Vulkan
author	bunnei <bunneidev@gmail.com>	2019-12-19 04:04:08 +0100
committer	GitHub <noreply@github.com>	2019-12-19 04:04:08 +0100
commit	d53cf05513947d29c48c3b6ade4f92326b4f0a02 (patch)
tree	0f49fa41e3df929a7092a88815689a6162b60142 /src/video_core
parent	Merge pull request #3227 from amilajack/patch-1 (diff)
parent	vk_scheduler: Delegate commands to a worker thread and state track (diff)
download	yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.gz yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.bz2 yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.lz yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.xz yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.tar.zst yuzu-d53cf05513947d29c48c3b6ade4f92326b4f0a02.zip