diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 49 | ||||
-rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 157 | ||||
-rw-r--r-- | src/video_core/engines/maxwell_3d.cpp | 31 | ||||
-rw-r--r-- | src/video_core/engines/shader_bytecode.h | 4 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/maxwell_to_gl.h | 22 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_image.cpp | 106 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_image.h | 84 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp | 127 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_staging_buffer_pool.h | 83 | ||||
-rw-r--r-- | src/video_core/shader/decode/register_set_predicate.cpp | 60 |
11 files changed, 469 insertions, 260 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index e615b238e..65d7b9f93 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -4,8 +4,6 @@ add_library(video_core STATIC buffer_cache/map_interval.h dma_pusher.cpp dma_pusher.h - debug_utils/debug_utils.cpp - debug_utils/debug_utils.h engines/const_buffer_engine_interface.h engines/const_buffer_info.h engines/engine_upload.cpp @@ -159,6 +157,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_buffer_cache.h renderer_vulkan/vk_device.cpp renderer_vulkan/vk_device.h + renderer_vulkan/vk_image.cpp + renderer_vulkan/vk_image.h renderer_vulkan/vk_memory_manager.cpp renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_resource_manager.cpp @@ -169,6 +169,8 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_scheduler.h renderer_vulkan/vk_shader_decompiler.cpp renderer_vulkan/vk_shader_decompiler.h + renderer_vulkan/vk_staging_buffer_pool.cpp + renderer_vulkan/vk_staging_buffer_pool.h renderer_vulkan/vk_stream_buffer.cpp renderer_vulkan/vk_stream_buffer.h renderer_vulkan/vk_swapchain.cpp diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp deleted file mode 100644 index f0ef67535..000000000 --- a/src/video_core/debug_utils/debug_utils.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#include <mutex> - -#include "video_core/debug_utils/debug_utils.h" - -namespace Tegra { - -void DebugContext::DoOnEvent(Event event, void* data) { - { - std::unique_lock lock{breakpoint_mutex}; - - // TODO(Subv): Commit the rasterizer's caches so framebuffers, render targets, etc. will - // show on debug widgets - - // TODO: Should stop the CPU thread here once we multithread emulation. - - active_breakpoint = event; - at_breakpoint = true; - - // Tell all observers that we hit a breakpoint - for (auto& breakpoint_observer : breakpoint_observers) { - breakpoint_observer->OnMaxwellBreakPointHit(event, data); - } - - // Wait until another thread tells us to Resume() - resume_from_breakpoint.wait(lock, [&] { return !at_breakpoint; }); - } -} - -void DebugContext::Resume() { - { - std::lock_guard lock{breakpoint_mutex}; - - // Tell all observers that we are about to resume - for (auto& breakpoint_observer : breakpoint_observers) { - breakpoint_observer->OnMaxwellResume(); - } - - // Resume the waiting thread (i.e. OnEvent()) - at_breakpoint = false; - } - - resume_from_breakpoint.notify_one(); -} - -} // namespace Tegra diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h deleted file mode 100644 index ac3a2eb01..000000000 --- a/src/video_core/debug_utils/debug_utils.h +++ /dev/null @@ -1,157 +0,0 @@ -// Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <condition_variable> -#include <list> -#include <memory> -#include <mutex> - -namespace Tegra { - -class DebugContext { -public: - enum class Event { - FirstEvent = 0, - - MaxwellCommandLoaded = FirstEvent, - MaxwellCommandProcessed, - IncomingPrimitiveBatch, - FinishedPrimitiveBatch, - - NumEvents - }; - - /** - * Inherit from this class to be notified of events registered to some debug context. - * Most importantly this is used for our debugger GUI. - * - * To implement event handling, override the OnMaxwellBreakPointHit and OnMaxwellResume methods. - * @warning All BreakPointObservers need to be on the same thread to guarantee thread-safe state - * access - * @todo Evaluate an alternative interface, in which there is only one managing observer and - * multiple child observers running (by design) on the same thread. - */ - class BreakPointObserver { - public: - /// Constructs the object such that it observes events of the given DebugContext. - explicit BreakPointObserver(std::shared_ptr<DebugContext> debug_context) - : context_weak(debug_context) { - std::unique_lock lock{debug_context->breakpoint_mutex}; - debug_context->breakpoint_observers.push_back(this); - } - - virtual ~BreakPointObserver() { - auto context = context_weak.lock(); - if (context) { - { - std::unique_lock lock{context->breakpoint_mutex}; - context->breakpoint_observers.remove(this); - } - - // If we are the last observer to be destroyed, tell the debugger context that - // it is free to continue. In particular, this is required for a proper yuzu - // shutdown, when the emulation thread is waiting at a breakpoint. - if (context->breakpoint_observers.empty()) - context->Resume(); - } - } - - /** - * Action to perform when a breakpoint was reached. - * @param event Type of event which triggered the breakpoint - * @param data Optional data pointer (if unused, this is a nullptr) - * @note This function will perform nothing unless it is overridden in the child class. - */ - virtual void OnMaxwellBreakPointHit(Event event, void* data) {} - - /** - * Action to perform when emulation is resumed from a breakpoint. - * @note This function will perform nothing unless it is overridden in the child class. - */ - virtual void OnMaxwellResume() {} - - protected: - /** - * Weak context pointer. This need not be valid, so when requesting a shared_ptr via - * context_weak.lock(), always compare the result against nullptr. - */ - std::weak_ptr<DebugContext> context_weak; - }; - - /** - * Simple structure defining a breakpoint state - */ - struct BreakPoint { - bool enabled = false; - }; - - /** - * Static constructor used to create a shared_ptr of a DebugContext. - */ - static std::shared_ptr<DebugContext> Construct() { - return std::shared_ptr<DebugContext>(new DebugContext); - } - - /** - * Used by the emulation core when a given event has happened. If a breakpoint has been set - * for this event, OnEvent calls the event handlers of the registered breakpoint observers. - * The current thread then is halted until Resume() is called from another thread (or until - * emulation is stopped). - * @param event Event which has happened - * @param data Optional data pointer (pass nullptr if unused). Needs to remain valid until - * Resume() is called. - */ - void OnEvent(Event event, void* data) { - // This check is left in the header to allow the compiler to inline it. - if (!breakpoints[(int)event].enabled) - return; - // For the rest of event handling, call a separate function. - DoOnEvent(event, data); - } - - void DoOnEvent(Event event, void* data); - - /** - * Resume from the current breakpoint. - * @warning Calling this from the same thread that OnEvent was called in will cause a deadlock. - * Calling from any other thread is safe. - */ - void Resume(); - - /** - * Delete all set breakpoints and resume emulation. - */ - void ClearBreakpoints() { - for (auto& bp : breakpoints) { - bp.enabled = false; - } - Resume(); - } - - // TODO: Evaluate if access to these members should be hidden behind a public interface. - std::array<BreakPoint, static_cast<int>(Event::NumEvents)> breakpoints; - Event active_breakpoint{}; - bool at_breakpoint = false; - -private: - /** - * Private default constructor to make sure people always construct this through Construct() - * instead. - */ - DebugContext() = default; - - /// Mutex protecting current breakpoint state and the observer list. - std::mutex breakpoint_mutex; - - /// Used by OnEvent to wait for resumption. - std::condition_variable resume_from_breakpoint; - - /// List of registered observers - std::list<BreakPointObserver*> breakpoint_observers; -}; - -} // namespace Tegra diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index e1cb8b0b0..1d1f780e7 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -7,7 +7,6 @@ #include "common/assert.h" #include "core/core.h" #include "core/core_timing.h" -#include "video_core/debug_utils/debug_utils.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" #include "video_core/memory_manager.h" @@ -273,8 +272,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3 } void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { - auto debug_context = system.GetGPUDebugContext(); - const u32 method = method_call.method; if (method == cb_data_state.current) { @@ -315,10 +312,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register, increase the size of the Regs structure"); - if (debug_context) { - debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr); - } - if (regs.reg_array[method] != method_call.argument) { regs.reg_array[method] = method_call.argument; const std::size_t dirty_reg = dirty_pointers[method]; @@ -424,10 +417,6 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) { default: break; } - - if (debug_context) { - debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr); - } } void Maxwell3D::StepInstance(const MMEDrawMode expected_mode, const u32 count) { @@ -485,12 +474,6 @@ void Maxwell3D::FlushMMEInlineDraw() { ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); ASSERT(mme_draw.instance_count == mme_draw.gl_end_count); - auto debug_context = system.GetGPUDebugContext(); - - if (debug_context) { - debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); - } - // Both instance configuration registers can not be set at the same time. ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); @@ -500,10 +483,6 @@ void Maxwell3D::FlushMMEInlineDraw() { rasterizer.DrawMultiBatch(is_indexed); } - if (debug_context) { - debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - // it's possible that it is incorrect and that there is some other register used to specify the @@ -650,12 +629,6 @@ void Maxwell3D::DrawArrays() { regs.vertex_buffer.count); ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?"); - auto debug_context = system.GetGPUDebugContext(); - - if (debug_context) { - debug_context->OnEvent(Tegra::DebugContext::Event::IncomingPrimitiveBatch, nullptr); - } - // Both instance configuration registers can not be set at the same time. ASSERT_MSG(!regs.draw.instance_next || !regs.draw.instance_cont, "Illegal combination of instancing parameters"); @@ -673,10 +646,6 @@ void Maxwell3D::DrawArrays() { rasterizer.DrawBatch(is_indexed); } - if (debug_context) { - debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr); - } - // TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if // the game is trying to draw indexed or direct mode. This needs to be verified on HW still - // it's possible that it is incorrect and that there is some other register used to specify the diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 412ca5551..57b57c647 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1051,7 +1051,7 @@ union Instruction { BitField<40, 1, R2pMode> mode; BitField<41, 2, u64> byte; BitField<20, 7, u64> immediate_mask; - } r2p; + } p2r_r2p; union { BitField<39, 3, u64> pred39; @@ -1801,6 +1801,7 @@ public: PSET, CSETP, R2P_IMM, + P2R_IMM, XMAD_IMM, XMAD_CR, XMAD_RC, @@ -2106,6 +2107,7 @@ private: INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"), INST("010100001010----", Id::CSETP, Type::PredicateSetPredicate, "CSETP"), INST("0011100-11110---", Id::R2P_IMM, Type::RegisterSetPredicate, "R2P_IMM"), + INST("0011100-11101---", Id::P2R_IMM, Type::RegisterSetPredicate, "P2R_IMM"), INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"), INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"), INST("010100010-------", Id::XMAD_RC, Type::Xmad, "XMAD_RC"), diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 9ed738171..ea4f35663 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -120,6 +120,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_POINTS; case Maxwell::PrimitiveTopology::Lines: return GL_LINES; + case Maxwell::PrimitiveTopology::LineLoop: + return GL_LINE_LOOP; case Maxwell::PrimitiveTopology::LineStrip: return GL_LINE_STRIP; case Maxwell::PrimitiveTopology::Triangles: @@ -130,11 +132,23 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_TRIANGLE_FAN; case Maxwell::PrimitiveTopology::Quads: return GL_QUADS; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); - UNREACHABLE(); - return {}; + case Maxwell::PrimitiveTopology::QuadStrip: + return GL_QUAD_STRIP; + case Maxwell::PrimitiveTopology::Polygon: + return GL_POLYGON; + case Maxwell::PrimitiveTopology::LinesAdjacency: + return GL_LINES_ADJACENCY; + case Maxwell::PrimitiveTopology::LineStripAdjacency: + return GL_LINE_STRIP_ADJACENCY; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + return GL_TRIANGLES_ADJACENCY; + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + return GL_TRIANGLE_STRIP_ADJACENCY; + case Maxwell::PrimitiveTopology::Patches: + return GL_PATCHES; } + UNREACHABLE_MSG("Invalid topology={}", static_cast<int>(topology)); + return GL_POINTS; } inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, diff --git a/src/video_core/renderer_vulkan/vk_image.cpp b/src/video_core/renderer_vulkan/vk_image.cpp new file mode 100644 index 000000000..4bcbef959 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_image.cpp @@ -0,0 +1,106 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <memory> +#include <vector> + +#include "common/assert.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_image.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" + +namespace Vulkan { + +VKImage::VKImage(const VKDevice& device, VKScheduler& scheduler, + const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask) + : device{device}, scheduler{scheduler}, format{image_ci.format}, aspect_mask{aspect_mask}, + image_num_layers{image_ci.arrayLayers}, image_num_levels{image_ci.mipLevels} { + UNIMPLEMENTED_IF_MSG(image_ci.queueFamilyIndexCount != 0, + "Queue family tracking is not implemented"); + + const auto dev = device.GetLogical(); + image = dev.createImageUnique(image_ci, nullptr, device.GetDispatchLoader()); + + const u32 num_ranges = image_num_layers * image_num_levels; + barriers.resize(num_ranges); + subrange_states.resize(num_ranges, {{}, image_ci.initialLayout}); +} + +VKImage::~VKImage() = default; + +void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, + vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, + vk::ImageLayout new_layout) { + if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) { + return; + } + + std::size_t cursor = 0; + for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { + for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) { + const u32 layer = base_layer + layer_it; + const u32 level = base_level + level_it; + auto& state = GetSubrangeState(layer, level); + barriers[cursor] = vk::ImageMemoryBarrier( + state.access, new_access, state.layout, new_layout, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, *image, {aspect_mask, level, 1, layer, 1}); + state.access = new_access; + state.layout = new_layout; + } + } + + scheduler.RequestOutsideRenderPassOperationContext(); + + scheduler.Record([barriers = barriers, cursor](auto cmdbuf, auto& dld) { + // TODO(Rodrigo): Implement a way to use the latest stage across subresources. + constexpr auto stage_stub = vk::PipelineStageFlagBits::eAllCommands; + cmdbuf.pipelineBarrier(stage_stub, stage_stub, {}, 0, nullptr, 0, nullptr, + static_cast<u32>(cursor), barriers.data(), dld); + }); +} + +bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, + vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept { + const bool is_full_range = base_layer == 0 && num_layers == image_num_layers && + base_level == 0 && num_levels == image_num_levels; + if (!is_full_range) { + state_diverged = true; + } + + if (!state_diverged) { + auto& state = GetSubrangeState(0, 0); + if (state.access != new_access || state.layout != new_layout) { + return true; + } + } + + for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) { + for (u32 level_it = 0; level_it < num_levels; ++level_it) { + const u32 layer = base_layer + layer_it; + const u32 level = base_level + level_it; + auto& state = GetSubrangeState(layer, level); + if (state.access != new_access || state.layout != new_layout) { + return true; + } + } + } + return false; +} + +void VKImage::CreatePresentView() { + // Image type has to be 2D to be presented. + const vk::ImageViewCreateInfo image_view_ci({}, *image, vk::ImageViewType::e2D, format, {}, + {aspect_mask, 0, 1, 0, 1}); + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + present_view = dev.createImageViewUnique(image_view_ci, nullptr, dld); +} + +VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept { + return subrange_states[static_cast<std::size_t>(layer * image_num_levels) + + static_cast<std::size_t>(level)]; +} + +} // namespace Vulkan
\ No newline at end of file diff --git a/src/video_core/renderer_vulkan/vk_image.h b/src/video_core/renderer_vulkan/vk_image.h new file mode 100644 index 000000000..b78242512 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_image.h @@ -0,0 +1,84 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <memory> +#include <vector> + +#include "common/common_types.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Vulkan { + +class VKDevice; +class VKScheduler; + +class VKImage { +public: + explicit VKImage(const VKDevice& device, VKScheduler& scheduler, + const vk::ImageCreateInfo& image_ci, vk::ImageAspectFlags aspect_mask); + ~VKImage(); + + /// Records in the passed command buffer an image transition and updates the state of the image. + void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, + vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access, + vk::ImageLayout new_layout); + + /// Returns a view compatible with presentation, the image has to be 2D. + vk::ImageView GetPresentView() { + if (!present_view) { + CreatePresentView(); + } + return *present_view; + } + + /// Returns the Vulkan image handler. + vk::Image GetHandle() const { + return *image; + } + + /// Returns the Vulkan format for this image. + vk::Format GetFormat() const { + return format; + } + + /// Returns the Vulkan aspect mask. + vk::ImageAspectFlags GetAspectMask() const { + return aspect_mask; + } + +private: + struct SubrangeState final { + vk::AccessFlags access{}; ///< Current access bits. + vk::ImageLayout layout = vk::ImageLayout::eUndefined; ///< Current image layout. + }; + + bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels, + vk::AccessFlags new_access, vk::ImageLayout new_layout) noexcept; + + /// Creates a presentation view. + void CreatePresentView(); + + /// Returns the subrange state for a layer and layer. + SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept; + + const VKDevice& device; ///< Device handler. + VKScheduler& scheduler; ///< Device scheduler. + + const vk::Format format; ///< Vulkan format. + const vk::ImageAspectFlags aspect_mask; ///< Vulkan aspect mask. + const u32 image_num_layers; ///< Number of layers. + const u32 image_num_levels; ///< Number of mipmap levels. + + UniqueImage image; ///< Image handle. + UniqueImageView present_view; ///< Image view compatible with presentation. + + std::vector<vk::ImageMemoryBarrier> barriers; ///< Pool of barriers. + std::vector<SubrangeState> subrange_states; ///< Current subrange state. + + bool state_diverged = false; ///< True when subresources mismatch in layout. +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp new file mode 100644 index 000000000..171d78afc --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -0,0 +1,127 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "common/bit_util.h" +#include "common/common_types.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" + +namespace Vulkan { + +VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, + u64 last_epoch) + : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {} + +VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept { + buffer = std::move(rhs.buffer); + watch = std::move(rhs.watch); + last_epoch = rhs.last_epoch; +} + +VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default; + +VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=( + StagingBuffer&& rhs) noexcept { + buffer = std::move(rhs.buffer); + watch = std::move(rhs.watch); + last_epoch = rhs.last_epoch; + return *this; +} + +VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler) + : device{device}, memory_manager{memory_manager}, scheduler{scheduler}, + is_device_integrated{device.IsIntegrated()} {} + +VKStagingBufferPool::~VKStagingBufferPool() = default; + +VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) { + if (const auto buffer = TryGetReservedBuffer(size, host_visible)) { + return *buffer; + } + return CreateStagingBuffer(size, host_visible); +} + +void VKStagingBufferPool::TickFrame() { + ++epoch; + current_delete_level = (current_delete_level + 1) % NumLevels; + + ReleaseCache(true); + if (!is_device_integrated) { + ReleaseCache(false); + } +} + +VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) { + for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) { + if (entry.watch.TryWatch(scheduler.GetFence())) { + entry.last_epoch = epoch; + return &*entry.buffer; + } + } + return nullptr; +} + +VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) { + const auto usage = + vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer; + const u32 log2 = Common::Log2Ceil64(size); + const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0, + nullptr); + const auto dev = device.GetLogical(); + auto buffer = std::make_unique<VKBuffer>(); + buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader()); + buffer->commit = memory_manager.Commit(*buffer->handle, host_visible); + + auto& entries = GetCache(host_visible)[log2].entries; + return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer; +} + +VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) { + return is_device_integrated || host_visible ? host_staging_buffers : device_staging_buffers; +} + +void VKStagingBufferPool::ReleaseCache(bool host_visible) { + auto& cache = GetCache(host_visible); + const u64 size = ReleaseLevel(cache, current_delete_level); + if (size == 0) { + return; + } +} + +u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) { + static constexpr u64 epochs_to_destroy = 180; + static constexpr std::size_t deletions_per_tick = 16; + + auto& staging = cache[log2]; + auto& entries = staging.entries; + const std::size_t old_size = entries.size(); + + const auto is_deleteable = [this](const auto& entry) { + return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed(); + }; + const std::size_t begin_offset = staging.delete_index; + const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size); + const auto begin = std::begin(entries) + begin_offset; + const auto end = std::begin(entries) + end_offset; + entries.erase(std::remove_if(begin, end, is_deleteable), end); + + const std::size_t new_size = entries.size(); + staging.delete_index += deletions_per_tick; + if (staging.delete_index >= new_size) { + staging.delete_index = 0; + } + + return (1ULL << log2) * (old_size - new_size); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h new file mode 100644 index 000000000..02310375f --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -0,0 +1,83 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <climits> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "common/common_types.h" + +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" + +namespace Vulkan { + +class VKDevice; +class VKFenceWatch; +class VKScheduler; + +struct VKBuffer final { + UniqueBuffer handle; + VKMemoryCommit commit; +}; + +class VKStagingBufferPool final { +public: + explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler); + ~VKStagingBufferPool(); + + VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible); + + void TickFrame(); + +private: + struct StagingBuffer final { + explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch); + StagingBuffer(StagingBuffer&& rhs) noexcept; + StagingBuffer(const StagingBuffer&) = delete; + ~StagingBuffer(); + + StagingBuffer& operator=(StagingBuffer&& rhs) noexcept; + + std::unique_ptr<VKBuffer> buffer; + VKFenceWatch watch; + u64 last_epoch = 0; + }; + + struct StagingBuffers final { + std::vector<StagingBuffer> entries; + std::size_t delete_index = 0; + }; + + static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT; + using StagingBuffersCache = std::array<StagingBuffers, NumLevels>; + + VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible); + + VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible); + + StagingBuffersCache& GetCache(bool host_visible); + + void ReleaseCache(bool host_visible); + + u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2); + + const VKDevice& device; + VKMemoryManager& memory_manager; + VKScheduler& scheduler; + const bool is_device_integrated; + + StagingBuffersCache host_staging_buffers; + StagingBuffersCache device_staging_buffers; + + u64 epoch = 0; + + std::size_t current_delete_level = 0; +}; + +} // namespace Vulkan diff --git a/src/video_core/shader/decode/register_set_predicate.cpp b/src/video_core/shader/decode/register_set_predicate.cpp index e6c9d287e..8d54cce34 100644 --- a/src/video_core/shader/decode/register_set_predicate.cpp +++ b/src/video_core/shader/decode/register_set_predicate.cpp @@ -13,37 +13,65 @@ namespace VideoCommon::Shader { using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +namespace { +constexpr u64 NUM_PROGRAMMABLE_PREDICATES = 7; +} + u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - UNIMPLEMENTED_IF(instr.r2p.mode != Tegra::Shader::R2pMode::Pr); + UNIMPLEMENTED_IF(instr.p2r_r2p.mode != Tegra::Shader::R2pMode::Pr); - const Node apply_mask = [&]() { + const Node apply_mask = [&] { switch (opcode->get().GetId()) { case OpCode::Id::R2P_IMM: - return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); + case OpCode::Id::P2R_IMM: + return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask)); default: UNREACHABLE(); - return Immediate(static_cast<u32>(instr.r2p.immediate_mask)); + return Immediate(0); } }(); - const Node mask = GetRegister(instr.gpr8); - const auto offset = static_cast<u32>(instr.r2p.byte) * 8; - constexpr u32 programmable_preds = 7; - for (u64 pred = 0; pred < programmable_preds; ++pred) { - const auto shift = static_cast<u32>(pred); + const auto offset = static_cast<u32>(instr.p2r_r2p.byte) * 8; + + switch (opcode->get().GetId()) { + case OpCode::Id::R2P_IMM: { + const Node mask = GetRegister(instr.gpr8); - const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); - const Node condition = - Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); + for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { + const auto shift = static_cast<u32>(pred); - const Node value_compare = BitfieldExtract(mask, offset + shift, 1); - const Node value = Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + const Node apply_compare = BitfieldExtract(apply_mask, shift, 1); + const Node condition = + Operation(OperationCode::LogicalUNotEqual, apply_compare, Immediate(0)); - const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); - bb.push_back(Conditional(condition, {code})); + const Node value_compare = BitfieldExtract(mask, offset + shift, 1); + const Node value = + Operation(OperationCode::LogicalUNotEqual, value_compare, Immediate(0)); + + const Node code = Operation(OperationCode::LogicalAssign, GetPredicate(pred), value); + bb.push_back(Conditional(condition, {code})); + } + break; + } + case OpCode::Id::P2R_IMM: { + Node value = Immediate(0); + for (u64 pred = 0; pred < NUM_PROGRAMMABLE_PREDICATES; ++pred) { + Node bit = Operation(OperationCode::Select, GetPredicate(pred), Immediate(1U << pred), + Immediate(0)); + value = Operation(OperationCode::UBitwiseOr, std::move(value), std::move(bit)); + } + value = Operation(OperationCode::UBitwiseAnd, std::move(value), apply_mask); + value = BitfieldInsert(GetRegister(instr.gpr8), std::move(value), offset, 8); + + SetRegister(bb, instr.gpr0, std::move(value)); + break; + } + default: + UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName()); + break; } return pc; |