summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp150
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h198
-rw-r--r--src/video_core/shader/decode/conversion.cpp15
-rw-r--r--src/video_core/shader/decode/texture.cpp13
6 files changed, 335 insertions, 44 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d6a2cc8b8..dfb12cd2d 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1973,7 +1973,7 @@ private:
INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
- INST("11011111--00----", Id::TLD4S, Type::Texture, "TLD4S"),
+ INST("11011111-0------", Id::TLD4S, Type::Texture, "TLD4S"),
INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
INST("11011110011110--", Id::TXD_B, Type::Texture, "TXD_B"),
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d1ae4be6d..0389c2143 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -399,6 +399,7 @@ public:
DeclareConstantBuffers();
DeclareGlobalMemory();
DeclareSamplers();
+ DeclareImages();
DeclarePhysicalAttributeReader();
code.AddLine("void execute_{}() {{", suffix);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 0f8116458..d66133ad1 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -3,7 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "video_core/renderer_vulkan/declarations.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -11,46 +11,172 @@
namespace Vulkan {
+MICROPROFILE_DECLARE(Vulkan_WaitForWorker);
+
+void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf,
+ const vk::DispatchLoaderDynamic& dld) {
+ auto command = first;
+ while (command != nullptr) {
+ auto next = command->GetNext();
+ command->Execute(cmdbuf, dld);
+ command->~Command();
+ command = next;
+ }
+
+ command_offset = 0;
+ first = nullptr;
+ last = nullptr;
+}
+
VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager)
- : device{device}, resource_manager{resource_manager} {
- next_fence = &resource_manager.CommitFence();
+ : device{device}, resource_manager{resource_manager}, next_fence{
+ &resource_manager.CommitFence()} {
+ AcquireNewChunk();
AllocateNewContext();
+ worker_thread = std::thread(&VKScheduler::WorkerThread, this);
}
-VKScheduler::~VKScheduler() = default;
+VKScheduler::~VKScheduler() {
+ quit = true;
+ cv.notify_all();
+ worker_thread.join();
+}
void VKScheduler::Flush(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
- if (release_fence)
+ if (release_fence) {
current_fence->Release();
+ }
AllocateNewContext();
}
void VKScheduler::Finish(bool release_fence, vk::Semaphore semaphore) {
SubmitExecution(semaphore);
current_fence->Wait();
- if (release_fence)
+ if (release_fence) {
current_fence->Release();
+ }
AllocateNewContext();
}
+void VKScheduler::WaitWorker() {
+ MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
+ DispatchWork();
+
+ bool finished = false;
+ do {
+ cv.notify_all();
+ std::unique_lock lock{mutex};
+ finished = chunk_queue.Empty();
+ } while (!finished);
+}
+
+void VKScheduler::DispatchWork() {
+ if (chunk->Empty()) {
+ return;
+ }
+ chunk_queue.Push(std::move(chunk));
+ cv.notify_all();
+ AcquireNewChunk();
+}
+
+void VKScheduler::RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi) {
+ if (state.renderpass && renderpass_bi == *state.renderpass) {
+ return;
+ }
+ const bool end_renderpass = state.renderpass.has_value();
+ state.renderpass = renderpass_bi;
+ Record([renderpass_bi, end_renderpass](auto cmdbuf, auto& dld) {
+ if (end_renderpass) {
+ cmdbuf.endRenderPass(dld);
+ }
+ cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+ });
+}
+
+void VKScheduler::RequestOutsideRenderPassOperationContext() {
+ EndRenderPass();
+}
+
+void VKScheduler::BindGraphicsPipeline(vk::Pipeline pipeline) {
+ if (state.graphics_pipeline == pipeline) {
+ return;
+ }
+ state.graphics_pipeline = pipeline;
+ Record([pipeline](auto cmdbuf, auto& dld) {
+ cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+ });
+}
+
+void VKScheduler::WorkerThread() {
+ std::unique_lock lock{mutex};
+ do {
+ cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
+ if (quit) {
+ continue;
+ }
+ auto extracted_chunk = std::move(chunk_queue.Front());
+ chunk_queue.Pop();
+ extracted_chunk->ExecuteAll(current_cmdbuf, device.GetDispatchLoader());
+ chunk_reserve.Push(std::move(extracted_chunk));
+ } while (!quit);
+}
+
void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
+ EndPendingOperations();
+ InvalidateState();
+ WaitWorker();
+
+ std::unique_lock lock{mutex};
+
+ const auto queue = device.GetGraphicsQueue();
const auto& dld = device.GetDispatchLoader();
current_cmdbuf.end(dld);
- const auto queue = device.GetGraphicsQueue();
- const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1u : 0u,
+ const vk::SubmitInfo submit_info(0, nullptr, nullptr, 1, &current_cmdbuf, semaphore ? 1U : 0U,
&semaphore);
- queue.submit({submit_info}, *current_fence, dld);
+ queue.submit({submit_info}, static_cast<vk::Fence>(*current_fence), dld);
}
void VKScheduler::AllocateNewContext() {
+ std::unique_lock lock{mutex};
current_fence = next_fence;
- current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
next_fence = &resource_manager.CommitFence();
- const auto& dld = device.GetDispatchLoader();
- current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit}, dld);
+ current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
+ current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
+ device.GetDispatchLoader());
+}
+
+void VKScheduler::InvalidateState() {
+ state.graphics_pipeline = nullptr;
+ state.viewports = false;
+ state.scissors = false;
+ state.depth_bias = false;
+ state.blend_constants = false;
+ state.depth_bounds = false;
+ state.stencil_values = false;
+}
+
+void VKScheduler::EndPendingOperations() {
+ EndRenderPass();
+}
+
+void VKScheduler::EndRenderPass() {
+ if (!state.renderpass) {
+ return;
+ }
+ state.renderpass = std::nullopt;
+ Record([](auto cmdbuf, auto& dld) { cmdbuf.endRenderPass(dld); });
+}
+
+void VKScheduler::AcquireNewChunk() {
+ if (chunk_reserve.Empty()) {
+ chunk = std::make_unique<CommandChunk>();
+ return;
+ }
+ chunk = std::move(chunk_reserve.Front());
+ chunk_reserve.Pop();
}
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 0e5b49c7f..bcdffbba0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -4,7 +4,14 @@
#pragma once
+#include <condition_variable>
+#include <memory>
+#include <optional>
+#include <stack>
+#include <thread>
+#include <utility>
#include "common/common_types.h"
+#include "common/threadsafe_queue.h"
#include "video_core/renderer_vulkan/declarations.h"
namespace Vulkan {
@@ -30,56 +37,197 @@ private:
VKFence* const& fence;
};
-class VKCommandBufferView {
+/// The scheduler abstracts command buffer and fence management with an interface that's able to do
+/// OpenGL-like operations on Vulkan command buffers.
+class VKScheduler {
public:
- VKCommandBufferView() = default;
- VKCommandBufferView(const vk::CommandBuffer& cmdbuf) : cmdbuf{cmdbuf} {}
+ explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
+ ~VKScheduler();
+
+ /// Sends the current execution context to the GPU.
+ void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+
+ /// Sends the current execution context to the GPU and waits for it to complete.
+ void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+
+ /// Waits for the worker thread to finish executing everything. After this function returns it's
+ /// safe to touch worker resources.
+ void WaitWorker();
+
+ /// Sends currently recorded work to the worker thread.
+ void DispatchWork();
+
+ /// Requests to begin a renderpass.
+ void RequestRenderpass(const vk::RenderPassBeginInfo& renderpass_bi);
+
+ /// Requests the current executino context to be able to execute operations only allowed outside
+ /// of a renderpass.
+ void RequestOutsideRenderPassOperationContext();
+
+ /// Binds a pipeline to the current execution context.
+ void BindGraphicsPipeline(vk::Pipeline pipeline);
- const vk::CommandBuffer* operator->() const noexcept {
- return &cmdbuf;
+ /// Returns true when viewports have been set in the current command buffer.
+ bool TouchViewports() {
+ return std::exchange(state.viewports, true);
}
- operator vk::CommandBuffer() const noexcept {
- return cmdbuf;
+ /// Returns true when scissors have been set in the current command buffer.
+ bool TouchScissors() {
+ return std::exchange(state.scissors, true);
}
-private:
- const vk::CommandBuffer& cmdbuf;
-};
+ /// Returns true when depth bias have been set in the current command buffer.
+ bool TouchDepthBias() {
+ return std::exchange(state.depth_bias, true);
+ }
-/// The scheduler abstracts command buffer and fence management with an interface that's able to do
-/// OpenGL-like operations on Vulkan command buffers.
-class VKScheduler {
-public:
- explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager);
- ~VKScheduler();
+ /// Returns true when blend constants have been set in the current command buffer.
+ bool TouchBlendConstants() {
+ return std::exchange(state.blend_constants, true);
+ }
+
+ /// Returns true when depth bounds have been set in the current command buffer.
+ bool TouchDepthBounds() {
+ return std::exchange(state.depth_bounds, true);
+ }
+
+ /// Returns true when stencil values have been set in the current command buffer.
+ bool TouchStencilValues() {
+ return std::exchange(state.stencil_values, true);
+ }
+
+ /// Send work to a separate thread.
+ template <typename T>
+ void Record(T&& command) {
+ if (chunk->Record(command)) {
+ return;
+ }
+ DispatchWork();
+ (void)chunk->Record(command);
+ }
/// Gets a reference to the current fence.
VKFenceView GetFence() const {
return current_fence;
}
- /// Gets a reference to the current command buffer.
- VKCommandBufferView GetCommandBuffer() const {
- return current_cmdbuf;
- }
+private:
+ class Command {
+ public:
+ virtual ~Command() = default;
- /// Sends the current execution context to the GPU.
- void Flush(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+ virtual void Execute(vk::CommandBuffer cmdbuf,
+ const vk::DispatchLoaderDynamic& dld) const = 0;
- /// Sends the current execution context to the GPU and waits for it to complete.
- void Finish(bool release_fence = true, vk::Semaphore semaphore = nullptr);
+ Command* GetNext() const {
+ return next;
+ }
+
+ void SetNext(Command* next_) {
+ next = next_;
+ }
+
+ private:
+ Command* next = nullptr;
+ };
+
+ template <typename T>
+ class TypedCommand final : public Command {
+ public:
+ explicit TypedCommand(T&& command) : command{std::move(command)} {}
+ ~TypedCommand() override = default;
+
+ TypedCommand(TypedCommand&&) = delete;
+ TypedCommand& operator=(TypedCommand&&) = delete;
+
+ void Execute(vk::CommandBuffer cmdbuf,
+ const vk::DispatchLoaderDynamic& dld) const override {
+ command(cmdbuf, dld);
+ }
+
+ private:
+ T command;
+ };
+
+ class CommandChunk final {
+ public:
+ void ExecuteAll(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld);
+
+ template <typename T>
+ bool Record(T& command) {
+ using FuncType = TypedCommand<T>;
+ static_assert(sizeof(FuncType) < sizeof(data), "Lambda is too large");
+
+ if (command_offset > sizeof(data) - sizeof(FuncType)) {
+ return false;
+ }
+
+ Command* current_last = last;
+
+ last = new (data.data() + command_offset) FuncType(std::move(command));
+
+ if (current_last) {
+ current_last->SetNext(last);
+ } else {
+ first = last;
+ }
+
+ command_offset += sizeof(FuncType);
+ return true;
+ }
+
+ bool Empty() const {
+ return command_offset == 0;
+ }
+
+ private:
+ Command* first = nullptr;
+ Command* last = nullptr;
+
+ std::size_t command_offset = 0;
+ std::array<u8, 0x8000> data{};
+ };
+
+ void WorkerThread();
-private:
void SubmitExecution(vk::Semaphore semaphore);
void AllocateNewContext();
+ void InvalidateState();
+
+ void EndPendingOperations();
+
+ void EndRenderPass();
+
+ void AcquireNewChunk();
+
const VKDevice& device;
VKResourceManager& resource_manager;
vk::CommandBuffer current_cmdbuf;
VKFence* current_fence = nullptr;
VKFence* next_fence = nullptr;
+
+ struct State {
+ std::optional<vk::RenderPassBeginInfo> renderpass;
+ vk::Pipeline graphics_pipeline;
+ bool viewports = false;
+ bool scissors = false;
+ bool depth_bias = false;
+ bool blend_constants = false;
+ bool depth_bounds = false;
+ bool stencil_values = false;
+ } state;
+
+ std::unique_ptr<CommandChunk> chunk;
+ std::thread worker_thread;
+
+ Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
+ Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
+ std::mutex mutex;
+ std::condition_variable cv;
+ bool quit = false;
};
} // namespace Vulkan
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 32facd6ba..0eeb75559 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -63,12 +63,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
- Node value = [&]() {
+ Node value = [&] {
switch (opcode->get().GetId()) {
case OpCode::Id::I2F_R:
return GetRegister(instr.gpr20);
@@ -81,7 +80,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
return Immediate(0);
}
}();
+
const bool input_signed = instr.conversion.is_input_signed;
+
+ if (instr.conversion.src_size == Register::Size::Byte) {
+ const u32 offset = static_cast<u32>(instr.conversion.int_src.selector) * 8;
+ if (offset > 0) {
+ value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
+ std::move(value), Immediate(offset));
+ }
+ } else {
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
+ }
+
value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 994c05611..dff01a541 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -743,13 +743,18 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
// When lod is used always is in gpr20
const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
- // Fill empty entries from the guest sampler.
+ // Fill empty entries from the guest sampler
const std::size_t entry_coord_count = GetCoordCount(sampler.GetType());
if (type_coord_count != entry_coord_count) {
LOG_WARNING(HW_GPU, "Bound and built texture types mismatch");
- }
- for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
- coords.push_back(GetRegister(Register::ZeroIndex));
+
+ // When the size is higher we insert zeroes
+ for (std::size_t i = type_coord_count; i < entry_coord_count; ++i) {
+ coords.push_back(GetRegister(Register::ZeroIndex));
+ }
+
+ // Then we ensure the size matches the number of entries (dropping unused values)
+ coords.resize(entry_coord_count);
}
Node4 values;