diff options
Diffstat (limited to '')
90 files changed, 3667 insertions, 3497 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 570acb193..eb8f643a2 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -775,6 +775,9 @@ add_library(core STATIC hle/service/nvnflinger/graphic_buffer_producer.h hle/service/nvnflinger/hos_binder_driver_server.cpp hle/service/nvnflinger/hos_binder_driver_server.h + hle/service/nvnflinger/hardware_composer.cpp + hle/service/nvnflinger/hardware_composer.h + hle/service/nvnflinger/hwc_layer.h hle/service/nvnflinger/nvnflinger.cpp hle/service/nvnflinger/nvnflinger.h hle/service/nvnflinger/parcel.h diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index c1ebbd62d..abe95303e 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -1,6 +1,8 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include <boost/container/small_vector.hpp> + #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" @@ -38,19 +40,30 @@ NvResult nvdisp_disp0::Ioctl3(DeviceFD fd, Ioctl command, std::span<const u8> in void nvdisp_disp0::OnOpen(NvCore::SessionId session_id, DeviceFD fd) {} void nvdisp_disp0::OnClose(DeviceFD fd) {} -void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, - u32 height, u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle<int>& crop_rect, - std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { - const DAddr addr = nvmap.GetHandleAddress(buffer_handle); - LOG_TRACE(Service, - "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", - addr, offset, width, height, stride, format); +void nvdisp_disp0::Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers) { + std::vector<Tegra::FramebufferConfig> output_layers; + std::vector<Service::Nvidia::NvFence> output_fences; + output_layers.reserve(sorted_layers.size()); + output_fences.reserve(sorted_layers.size()); + + for (auto& layer : sorted_layers) { + output_layers.emplace_back(Tegra::FramebufferConfig{ + .address = nvmap.GetHandleAddress(layer.buffer_handle), + .offset = layer.offset, + .width = layer.width, + .height = layer.height, + .stride = layer.stride, + .pixel_format = layer.format, + .transform_flags = layer.transform, + .crop_rect = layer.crop_rect, + }); - const Tegra::FramebufferConfig framebuffer{addr, offset, width, height, - stride, format, transform, crop_rect}; + for (size_t i = 0; i < layer.acquire_fence.num_fences; i++) { + output_fences.push_back(layer.acquire_fence.fences[i]); + } + } - system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); + system.GPU().RequestComposite(std::move(output_layers), std::move(output_fences)); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); system.GetPerfStats().EndSystemFrame(); system.GetPerfStats().BeginSystemFrame(); diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index 5f13a50a2..1082b85c2 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -8,8 +8,7 @@ #include "common/common_types.h" #include "common/math_util.h" #include "core/hle/service/nvdrv/devices/nvdevice.h" -#include "core/hle/service/nvnflinger/buffer_transform_flags.h" -#include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/hwc_layer.h" namespace Service::Nvidia::NvCore { class Container; @@ -35,11 +34,8 @@ public: void OnOpen(NvCore::SessionId session_id, DeviceFD fd) override; void OnClose(DeviceFD fd) override; - /// Performs a screen flip, drawing the buffer pointed to by the handle. - void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, - u32 stride, android::BufferTransformFlags transform, - const Common::Rectangle<int>& crop_rect, - std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences); + /// Performs a screen flip, compositing each buffer. + void Composite(std::span<const Nvnflinger::HwcLayer> sorted_layers); Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvnflinger/hardware_composer.cpp b/src/core/hle/service/nvnflinger/hardware_composer.cpp new file mode 100644 index 000000000..c720dd1f8 --- /dev/null +++ b/src/core/hle/service/nvnflinger/hardware_composer.cpp @@ -0,0 +1,215 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <boost/container/small_vector.hpp> + +#include "common/microprofile.h" +#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" +#include "core/hle/service/nvnflinger/buffer_item.h" +#include "core/hle/service/nvnflinger/buffer_item_consumer.h" +#include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" +#include "core/hle/service/nvnflinger/hwc_layer.h" +#include "core/hle/service/nvnflinger/ui/graphic_buffer.h" +#include "core/hle/service/vi/display/vi_display.h" +#include "core/hle/service/vi/layer/vi_layer.h" + +namespace Service::Nvnflinger { + +namespace { + +s32 NormalizeSwapInterval(f32* out_speed_scale, s32 swap_interval) { + if (swap_interval <= 0) { + // As an extension, treat nonpositive swap interval as speed multiplier. + if (out_speed_scale) { + *out_speed_scale = 2.f * static_cast<f32>(1 - swap_interval); + } + + swap_interval = 1; + } + + if (swap_interval >= 5) { + // As an extension, treat high swap interval as precise speed control. + if (out_speed_scale) { + *out_speed_scale = static_cast<f32>(swap_interval) / 100.f; + } + + swap_interval = 1; + } + + return swap_interval; +} + +} // namespace + +HardwareComposer::HardwareComposer() = default; +HardwareComposer::~HardwareComposer() = default; + +u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, VI::Display& display, + Nvidia::Devices::nvdisp_disp0& nvdisp, u32 frame_advance) { + boost::container::small_vector<HwcLayer, 2> composition_stack; + + m_frame_number += frame_advance; + + // Release any necessary framebuffers. + for (auto& [layer_id, framebuffer] : m_framebuffers) { + if (framebuffer.release_frame_number > m_frame_number) { + // Not yet ready to release this framebuffer. + continue; + } + + if (!framebuffer.is_acquired) { + // Already released. + continue; + } + + if (auto* layer = display.FindLayer(layer_id); layer != nullptr) { + // TODO: support release fence + // This is needed to prevent screen tearing + layer->GetConsumer().ReleaseBuffer(framebuffer.item, android::Fence::NoFence()); + framebuffer.is_acquired = false; + } + } + + // Set default speed limit to 100%. + *out_speed_scale = 1.0f; + + // Determine the number of vsync periods to wait before composing again. + std::optional<s32> swap_interval{}; + bool has_acquired_buffer{}; + + // Acquire all necessary framebuffers. + for (size_t i = 0; i < display.GetNumLayers(); i++) { + auto& layer = display.GetLayer(i); + auto layer_id = layer.GetLayerId(); + + // Try to fetch the framebuffer (either new or stale). + const auto result = this->CacheFramebufferLocked(layer, layer_id); + + // If we failed, skip this layer. + if (result == CacheStatus::NoBufferAvailable) { + continue; + } + + // If we acquired a new buffer, we need to present. + if (result == CacheStatus::BufferAcquired) { + has_acquired_buffer = true; + } + + const auto& buffer = m_framebuffers[layer_id]; + const auto& item = buffer.item; + const auto& igbp_buffer = *item.graphic_buffer; + + // TODO: get proper Z-index from layer + composition_stack.emplace_back(HwcLayer{ + .buffer_handle = igbp_buffer.BufferId(), + .offset = igbp_buffer.Offset(), + .format = igbp_buffer.ExternalFormat(), + .width = igbp_buffer.Width(), + .height = igbp_buffer.Height(), + .stride = igbp_buffer.Stride(), + .z_index = 0, + .transform = static_cast<android::BufferTransformFlags>(item.transform), + .crop_rect = item.crop, + .acquire_fence = item.fence, + }); + + // We need to compose again either before this frame is supposed to + // be released, or exactly on the vsync period it should be released. + const s32 item_swap_interval = NormalizeSwapInterval(out_speed_scale, item.swap_interval); + + // TODO: handle cases where swap intervals are relatively prime. So far, + // only swap intervals of 0, 1 and 2 have been observed, but if 3 were + // to be introduced, this would cause an issue. + if (swap_interval) { + swap_interval = std::min(*swap_interval, item_swap_interval); + } else { + swap_interval = item_swap_interval; + } + } + + // If any new buffers were acquired, we can present. + if (has_acquired_buffer) { + // Sort by Z-index. + std::stable_sort(composition_stack.begin(), composition_stack.end(), + [&](auto& l, auto& r) { return l.z_index < r.z_index; }); + + // Composite. + nvdisp.Composite(composition_stack); + } + + // Render MicroProfile. + MicroProfileFlip(); + + // Advance by at least one frame. + return swap_interval.value_or(1); +} + +void HardwareComposer::RemoveLayerLocked(VI::Display& display, LayerId layer_id) { + // Check if we are tracking a slot with this layer_id. + const auto it = m_framebuffers.find(layer_id); + if (it == m_framebuffers.end()) { + return; + } + + // Try to release the buffer item. + auto* const layer = display.FindLayer(layer_id); + if (layer && it->second.is_acquired) { + layer->GetConsumer().ReleaseBuffer(it->second.item, android::Fence::NoFence()); + } + + // Erase the slot. + m_framebuffers.erase(it); +} + +bool HardwareComposer::TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer) { + // Attempt the update. + const auto status = layer.GetConsumer().AcquireBuffer(&framebuffer.item, {}, false); + if (status != android::Status::NoError) { + return false; + } + + // We succeeded, so set the new release frame info. + framebuffer.release_frame_number = + NormalizeSwapInterval(nullptr, framebuffer.item.swap_interval); + framebuffer.is_acquired = true; + + return true; +} + +HardwareComposer::CacheStatus HardwareComposer::CacheFramebufferLocked(VI::Layer& layer, + LayerId layer_id) { + // Check if this framebuffer is already present. + const auto it = m_framebuffers.find(layer_id); + if (it != m_framebuffers.end()) { + // If it's currently still acquired, we are done. + if (it->second.is_acquired) { + return CacheStatus::CachedBufferReused; + } + + // Try to acquire a new item. + if (this->TryAcquireFramebufferLocked(layer, it->second)) { + // We got a new item. + return CacheStatus::BufferAcquired; + } else { + // We didn't acquire a new item, but we can reuse the slot. + return CacheStatus::CachedBufferReused; + } + } + + // Framebuffer is not present, so try to create it. + Framebuffer framebuffer{}; + + if (this->TryAcquireFramebufferLocked(layer, framebuffer)) { + // Move the buffer item into a new slot. + m_framebuffers.emplace(layer_id, std::move(framebuffer)); + + // We succeeded. + return CacheStatus::BufferAcquired; + } + + // We couldn't acquire the buffer item, so don't create a slot. + return CacheStatus::NoBufferAvailable; +} + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/hardware_composer.h b/src/core/hle/service/nvnflinger/hardware_composer.h new file mode 100644 index 000000000..ddab94ac9 --- /dev/null +++ b/src/core/hle/service/nvnflinger/hardware_composer.h @@ -0,0 +1,59 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include <memory> +#include <boost/container/flat_map.hpp> + +#include "core/hle/service/nvnflinger/buffer_item.h" + +namespace Service::Nvidia::Devices { +class nvdisp_disp0; +} + +namespace Service::VI { +class Display; +class Layer; +} // namespace Service::VI + +namespace Service::Nvnflinger { + +using LayerId = u64; + +class HardwareComposer { +public: + explicit HardwareComposer(); + ~HardwareComposer(); + + u32 ComposeLocked(f32* out_speed_scale, VI::Display& display, + Nvidia::Devices::nvdisp_disp0& nvdisp, u32 frame_advance); + void RemoveLayerLocked(VI::Display& display, LayerId layer_id); + +private: + // TODO: do we want to track frame number in vi instead? + u64 m_frame_number{0}; + +private: + using ReleaseFrameNumber = u64; + + struct Framebuffer { + android::BufferItem item{}; + ReleaseFrameNumber release_frame_number{}; + bool is_acquired{false}; + }; + + enum class CacheStatus : u32 { + NoBufferAvailable, + BufferAcquired, + CachedBufferReused, + }; + + boost::container::flat_map<LayerId, Framebuffer> m_framebuffers{}; + +private: + bool TryAcquireFramebufferLocked(VI::Layer& layer, Framebuffer& framebuffer); + CacheStatus CacheFramebufferLocked(VI::Layer& layer, LayerId layer_id); +}; + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/hwc_layer.h b/src/core/hle/service/nvnflinger/hwc_layer.h new file mode 100644 index 000000000..3af668a25 --- /dev/null +++ b/src/core/hle/service/nvnflinger/hwc_layer.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "core/hle/service/nvdrv/nvdata.h" +#include "core/hle/service/nvnflinger/buffer_transform_flags.h" +#include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/ui/fence.h" + +namespace Service::Nvnflinger { + +struct HwcLayer { + u32 buffer_handle; + u32 offset; + android::PixelFormat format; + u32 width; + u32 height; + u32 stride; + s32 z_index; + android::BufferTransformFlags transform; + Common::Rectangle<int> crop_rect; + android::Fence acquire_fence; +}; + +} // namespace Service::Nvnflinger diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index 51133853c..a4e848882 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -18,6 +18,7 @@ #include "core/hle/service/nvnflinger/buffer_item_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/fb_share_buffer_manager.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/nvnflinger/nvnflinger.h" #include "core/hle/service/nvnflinger/ui/graphic_buffer.h" @@ -279,45 +280,19 @@ void Nvnflinger::Compose() { SCOPE_EXIT({ display.SignalVSyncEvent(); }); // Don't do anything for displays without layers. - if (!display.HasLayers()) - continue; - - // TODO(Subv): Support more than 1 layer. - VI::Layer& layer = display.GetLayer(0); - - android::BufferItem buffer{}; - const auto status = layer.GetConsumer().AcquireBuffer(&buffer, {}, false); - - if (status != android::Status::NoError) { + if (!display.HasLayers()) { continue; } - const auto& igbp_buffer = *buffer.graphic_buffer; - if (!system.IsPoweredOn()) { return; // We are likely shutting down } - // Now send the buffer to the GPU for drawing. - // TODO(Subv): Support more than just disp0. The display device selection is probably based - // on which display we're drawing (Default, Internal, External, etc) auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>(disp_fd); ASSERT(nvdisp); - Common::Rectangle<int> crop_rect{ - static_cast<int>(buffer.crop.Left()), static_cast<int>(buffer.crop.Top()), - static_cast<int>(buffer.crop.Right()), static_cast<int>(buffer.crop.Bottom())}; - - nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), - igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), - static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect, - buffer.fence.fences, buffer.fence.num_fences); - - MicroProfileFlip(); - - swap_interval = buffer.swap_interval; - - layer.GetConsumer().ReleaseBuffer(buffer, android::Fence::NoFence()); + swap_interval = display.GetComposer().ComposeLocked(&compose_speed_scale, display, *nvdisp, + swap_interval); } } @@ -334,15 +309,16 @@ s64 Nvnflinger::GetNextTicks() const { speed_scale = 0.01f; } } + + // Adjust by speed limit determined during composition. + speed_scale /= compose_speed_scale; + if (system.GetNVDECActive() && settings.use_video_framerate.GetValue()) { // Run at intended presentation rate during video playback. speed_scale = 1.f; } - // As an extension, treat nonpositive swap interval as framerate multiplier. - const f32 effective_fps = swap_interval <= 0 ? 120.f * static_cast<f32>(1 - swap_interval) - : 60.f / static_cast<f32>(swap_interval); - + const f32 effective_fps = 60.f / static_cast<f32>(swap_interval); return static_cast<s64>(speed_scale * (1000000000.f / effective_fps)); } diff --git a/src/core/hle/service/nvnflinger/nvnflinger.h b/src/core/hle/service/nvnflinger/nvnflinger.h index 369439142..c984d55a0 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.h +++ b/src/core/hle/service/nvnflinger/nvnflinger.h @@ -46,6 +46,7 @@ class BufferQueueProducer; namespace Service::Nvnflinger { class FbShareBufferManager; +class HardwareComposer; class HosBinderDriverServer; class Nvnflinger final { @@ -143,6 +144,7 @@ private: u32 next_buffer_queue_id = 1; s32 swap_interval = 1; + f32 compose_speed_scale = 1.0f; bool is_abandoned = false; diff --git a/src/core/hle/service/vi/display/vi_display.cpp b/src/core/hle/service/vi/display/vi_display.cpp index dab1905cc..7f2af9acc 100644 --- a/src/core/hle/service/vi/display/vi_display.cpp +++ b/src/core/hle/service/vi/display/vi_display.cpp @@ -16,6 +16,7 @@ #include "core/hle/service/nvnflinger/buffer_queue_consumer.h" #include "core/hle/service/nvnflinger/buffer_queue_core.h" #include "core/hle/service/nvnflinger/buffer_queue_producer.h" +#include "core/hle/service/nvnflinger/hardware_composer.h" #include "core/hle/service/nvnflinger/hos_binder_driver_server.h" #include "core/hle/service/vi/display/vi_display.h" #include "core/hle/service/vi/layer/vi_layer.h" @@ -43,6 +44,7 @@ Display::Display(u64 id, std::string name_, KernelHelpers::ServiceContext& service_context_, Core::System& system_) : display_id{id}, name{std::move(name_)}, hos_binder_driver_server{hos_binder_driver_server_}, service_context{service_context_} { + hardware_composer = std::make_unique<Nvnflinger::HardwareComposer>(); vsync_event = service_context.CreateEvent(fmt::format("Display VSync Event {}", id)); } @@ -81,8 +83,6 @@ void Display::SignalVSyncEvent() { void Display::CreateLayer(u64 layer_id, u32 binder_id, Service::Nvidia::NvCore::Container& nv_core) { - ASSERT_MSG(layers.empty(), "Only one layer is supported per display at the moment"); - auto [core, producer, consumer] = CreateBufferQueue(service_context, nv_core.GetNvMapFile()); auto buffer_item_consumer = std::make_shared<android::BufferItemConsumer>(std::move(consumer)); diff --git a/src/core/hle/service/vi/display/vi_display.h b/src/core/hle/service/vi/display/vi_display.h index 8eb8a5155..220292cff 100644 --- a/src/core/hle/service/vi/display/vi_display.h +++ b/src/core/hle/service/vi/display/vi_display.h @@ -11,9 +11,14 @@ #include "common/common_types.h" #include "core/hle/result.h" +namespace Core { +class System; +} + namespace Kernel { class KEvent; -} +class KReadableEvent; +} // namespace Kernel namespace Service::android { class BufferQueueProducer; @@ -24,8 +29,9 @@ class ServiceContext; } namespace Service::Nvnflinger { +class HardwareComposer; class HosBinderDriverServer; -} +} // namespace Service::Nvnflinger namespace Service::Nvidia::NvCore { class Container; @@ -118,6 +124,10 @@ public: /// const Layer* FindLayer(u64 layer_id) const; + Nvnflinger::HardwareComposer& GetComposer() const { + return *hardware_composer; + } + private: u64 display_id; std::string name; @@ -125,6 +135,7 @@ private: KernelHelpers::ServiceContext& service_context; std::vector<std::unique_ptr<Layer>> layers; + std::unique_ptr<Nvnflinger::HardwareComposer> hardware_composer; Kernel::KEvent* vsync_event{}; bool is_abandoned{}; }; diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 73058db9a..d508ed28c 100644 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -195,8 +195,9 @@ private: void GetSharedBufferMemoryHandleId(HLERequestContext& ctx) { IPC::RequestParser rp{ctx}; const u64 buffer_id = rp.PopRaw<u64>(); + const u64 aruid = ctx.GetPID(); - LOG_INFO(Service_VI, "called. buffer_id={:#x}", buffer_id); + LOG_INFO(Service_VI, "called. buffer_id={:#x}, aruid={:#x}", buffer_id, aruid); struct OutputParameters { s32 nvmap_handle; @@ -206,7 +207,7 @@ private: OutputParameters out{}; Nvnflinger::SharedMemoryPoolLayout layout{}; const auto result = nvnflinger.GetSystemBufferManager().GetSharedBufferMemoryHandleId( - &out.size, &out.nvmap_handle, &layout, buffer_id, 0); + &out.size, &out.nvmap_handle, &layout, buffer_id, aruid); ctx.WriteBuffer(&layout, sizeof(layout)); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 0755ba772..16c905db9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -55,6 +55,7 @@ add_library(video_core STATIC engines/maxwell_dma.h engines/puller.cpp engines/puller.h + framebuffer_config.cpp framebuffer_config.h fsr.cpp fsr.h @@ -115,8 +116,24 @@ add_library(video_core STATIC renderer_null/null_rasterizer.h renderer_null/renderer_null.cpp renderer_null/renderer_null.h + renderer_opengl/present/filters.cpp + renderer_opengl/present/filters.h + renderer_opengl/present/fsr.cpp + renderer_opengl/present/fsr.h + renderer_opengl/present/fxaa.cpp + renderer_opengl/present/fxaa.h + renderer_opengl/present/layer.cpp + renderer_opengl/present/layer.h + renderer_opengl/present/present_uniforms.h + renderer_opengl/present/smaa.cpp + renderer_opengl/present/smaa.h + renderer_opengl/present/util.h + renderer_opengl/present/window_adapt_pass.cpp + renderer_opengl/present/window_adapt_pass.h renderer_opengl/blit_image.cpp renderer_opengl/blit_image.h + renderer_opengl/gl_blit_screen.cpp + renderer_opengl/gl_blit_screen.h renderer_opengl/gl_buffer_cache_base.cpp renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.h @@ -126,8 +143,6 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_fence_manager.cpp renderer_opengl/gl_fence_manager.h - renderer_opengl/gl_fsr.cpp - renderer_opengl/gl_fsr.h renderer_opengl/gl_graphics_pipeline.cpp renderer_opengl/gl_graphics_pipeline.h renderer_opengl/gl_rasterizer.cpp @@ -155,6 +170,22 @@ add_library(video_core STATIC renderer_opengl/renderer_opengl.h renderer_opengl/util_shaders.cpp renderer_opengl/util_shaders.h + renderer_vulkan/present/anti_alias_pass.h + renderer_vulkan/present/filters.cpp + renderer_vulkan/present/filters.h + renderer_vulkan/present/fsr.cpp + renderer_vulkan/present/fsr.h + renderer_vulkan/present/fxaa.cpp + renderer_vulkan/present/fxaa.h + renderer_vulkan/present/layer.cpp + renderer_vulkan/present/layer.h + renderer_vulkan/present/present_push_constants.h + renderer_vulkan/present/smaa.cpp + renderer_vulkan/present/smaa.h + renderer_vulkan/present/util.cpp + renderer_vulkan/present/util.h + renderer_vulkan/present/window_adapt_pass.cpp + renderer_vulkan/present/window_adapt_pass.h renderer_vulkan/blit_image.cpp renderer_vulkan/blit_image.h renderer_vulkan/fixed_pipeline_state.cpp @@ -181,8 +212,6 @@ add_library(video_core STATIC renderer_vulkan/vk_descriptor_pool.h renderer_vulkan/vk_fence_manager.cpp renderer_vulkan/vk_fence_manager.h - renderer_vulkan/vk_fsr.cpp - renderer_vulkan/vk_fsr.h renderer_vulkan/vk_graphics_pipeline.cpp renderer_vulkan/vk_graphics_pipeline.h renderer_vulkan/vk_master_semaphore.cpp @@ -203,8 +232,6 @@ add_library(video_core STATIC renderer_vulkan/vk_scheduler.h renderer_vulkan/vk_shader_util.cpp renderer_vulkan/vk_shader_util.h - renderer_vulkan/vk_smaa.cpp - renderer_vulkan/vk_smaa.h renderer_vulkan/vk_staging_buffer_pool.cpp renderer_vulkan/vk_staging_buffer_pool.h renderer_vulkan/vk_state_tracker.cpp diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h index 1a43e24b6..99341e431 100644 --- a/src/video_core/engines/maxwell_dma.h +++ b/src/video_core/engines/maxwell_dma.h @@ -8,6 +8,7 @@ #include <vector> #include "common/bit_field.h" +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/scratch_buffer.h" #include "video_core/engines/engine_interface.h" diff --git a/src/video_core/framebuffer_config.cpp b/src/video_core/framebuffer_config.cpp new file mode 100644 index 000000000..e28d41f84 --- /dev/null +++ b/src/video_core/framebuffer_config.cpp @@ -0,0 +1,55 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/assert.h" +#include "video_core/framebuffer_config.h" + +namespace Tegra { + +Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width, + u32 texture_height) { + f32 left, top, right, bottom; + + if (!framebuffer.crop_rect.IsEmpty()) { + // If crop rectangle is not empty, apply properties from rectangle. + left = static_cast<f32>(framebuffer.crop_rect.left); + top = static_cast<f32>(framebuffer.crop_rect.top); + right = static_cast<f32>(framebuffer.crop_rect.right); + bottom = static_cast<f32>(framebuffer.crop_rect.bottom); + } else { + // Otherwise, fall back to framebuffer dimensions. + left = 0; + top = 0; + right = static_cast<f32>(framebuffer.width); + bottom = static_cast<f32>(framebuffer.height); + } + + // Apply transformation flags. + auto framebuffer_transform_flags = framebuffer.transform_flags; + + if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) { + // Switch left and right. + std::swap(left, right); + } + if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) { + // Switch top and bottom. + std::swap(top, bottom); + } + + framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH; + framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV; + if (True(framebuffer_transform_flags)) { + UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", + static_cast<u32>(framebuffer_transform_flags)); + } + + // Normalize coordinate space. + left /= static_cast<f32>(texture_width); + top /= static_cast<f32>(texture_height); + right /= static_cast<f32>(texture_width); + bottom /= static_cast<f32>(texture_height); + + return Common::Rectangle<f32>(left, top, right, bottom); +} + +} // namespace Tegra diff --git a/src/video_core/framebuffer_config.h b/src/video_core/framebuffer_config.h index 856f4bd52..6a18b76fb 100644 --- a/src/video_core/framebuffer_config.h +++ b/src/video_core/framebuffer_config.h @@ -7,6 +7,7 @@ #include "common/math_util.h" #include "core/hle/service/nvnflinger/buffer_transform_flags.h" #include "core/hle/service/nvnflinger/pixel_format.h" +#include "core/hle/service/nvnflinger/ui/fence.h" namespace Tegra { @@ -21,7 +22,10 @@ struct FramebufferConfig { u32 stride{}; Service::android::PixelFormat pixel_format{}; Service::android::BufferTransformFlags transform_flags{}; - Common::Rectangle<int> crop_rect; + Common::Rectangle<int> crop_rect{}; }; +Common::Rectangle<f32> NormalizeCrop(const FramebufferConfig& framebuffer, u32 texture_width, + u32 texture_height); + } // namespace Tegra diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 609704b33..f4a5d831c 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -274,11 +274,6 @@ struct GPU::Impl { } } - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - gpu_thread.SwapBuffers(framebuffer); - } - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory void FlushRegion(DAddr addr, u64 size) { gpu_thread.FlushRegion(addr, size); @@ -313,8 +308,9 @@ struct GPU::Impl { gpu_thread.FlushAndInvalidateRegion(addr, size); } - void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { + void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, + std::vector<Service::Nvidia::NvFence>&& fences) { + size_t num_fences{fences.size()}; size_t current_request_counter{}; { std::unique_lock<std::mutex> lk(request_swap_mutex); @@ -328,13 +324,12 @@ struct GPU::Impl { } } const auto wait_fence = - RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { + RequestSyncOperation([this, current_request_counter, &layers, &fences, num_fences] { auto& syncpoint_manager = host1x.GetSyncpointManager(); if (num_fences == 0) { - renderer->SwapBuffers(framebuffer); + renderer->Composite(layers); } - const auto executer = [this, current_request_counter, - framebuffer_copy = *framebuffer]() { + const auto executer = [this, current_request_counter, layers_copy = layers]() { { std::unique_lock<std::mutex> lk(request_swap_mutex); if (--request_swap_counters[current_request_counter] != 0) { @@ -342,7 +337,7 @@ struct GPU::Impl { } free_swap_counters.push_back(current_request_counter); } - renderer->SwapBuffers(&framebuffer_copy); + renderer->Composite(layers_copy); }; for (size_t i = 0; i < num_fences; i++) { syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); @@ -505,9 +500,9 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { return impl->ShaderNotify(); } -void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { - impl->RequestSwapBuffers(framebuffer, fences, num_fences); +void GPU::RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, + std::vector<Service::Nvidia::NvFence>&& fences) { + impl->RequestComposite(std::move(layers), std::move(fences)); } u64 GPU::GetTicks() const { @@ -554,10 +549,6 @@ void GPU::ClearCdmaInstance(u32 id) { impl->ClearCdmaInstance(id); } -void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - impl->SwapBuffers(framebuffer); -} - VideoCore::RasterizerDownloadArea GPU::OnCPURead(PAddr addr, u64 size) { return impl->OnCPURead(addr, size); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b3c1d15bd..c4602ca37 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -212,8 +212,8 @@ public: void RendererFrameEndNotify(); - void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, - std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences); + void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, + std::vector<Service::Nvidia::NvFence>&& fences); /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 788d4f61e..58d8110b8 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -40,8 +40,6 @@ static void RunThread(std::stop_token stop_token, Core::System& system, } if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) { scheduler.Push(submit_list->channel, std::move(submit_list->entries)); - } else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) { - renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); } else if (std::holds_alternative<GPUTickCommand>(next.data)) { system.GPU().TickWork(); } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) { @@ -78,10 +76,6 @@ void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) { PushCommand(SubmitListCommand(channel, std::move(entries))); } -void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - PushCommand(SwapBuffersCommand(framebuffer ? std::make_optional(*framebuffer) : std::nullopt)); -} - void ThreadManager::FlushRegion(DAddr addr, u64 size) { if (!is_async) { // Always flush with synchronous GPU mode diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 2de25e9ef..dc0fce9f8 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -44,14 +44,6 @@ struct SubmitListCommand final { Tegra::CommandList entries; }; -/// Command to signal to the GPU thread that a swap buffers is pending -struct SwapBuffersCommand final { - explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer_) - : framebuffer{std::move(framebuffer_)} {} - - std::optional<Tegra::FramebufferConfig> framebuffer; -}; - /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { explicit constexpr FlushRegionCommand(DAddr addr_, u64 size_) : addr{addr_}, size{size_} {} @@ -81,8 +73,8 @@ struct FlushAndInvalidateRegionCommand final { struct GPUTickCommand final {}; using CommandData = - std::variant<std::monostate, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, - InvalidateRegionCommand, FlushAndInvalidateRegionCommand, GPUTickCommand>; + std::variant<std::monostate, SubmitListCommand, FlushRegionCommand, InvalidateRegionCommand, + FlushAndInvalidateRegionCommand, GPUTickCommand>; struct CommandDataContainer { CommandDataContainer() = default; @@ -118,9 +110,6 @@ public: /// Push GPU command entries to be processed void SubmitList(s32 channel, Tegra::CommandList&& entries); - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory void FlushRegion(DAddr addr, u64 size); diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index cd2549232..969f21d50 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -9,7 +9,7 @@ set(FIDELITYFX_FILES ) set(GLSL_INCLUDES - fidelityfx_fsr.comp + fidelityfx_fsr.frag ${FIDELITYFX_FILES} ) @@ -56,10 +56,11 @@ set(SHADER_FILES vulkan_color_clear.frag vulkan_color_clear.vert vulkan_depthstencil_clear.frag - vulkan_fidelityfx_fsr_easu_fp16.comp - vulkan_fidelityfx_fsr_easu_fp32.comp - vulkan_fidelityfx_fsr_rcas_fp16.comp - vulkan_fidelityfx_fsr_rcas_fp32.comp + vulkan_fidelityfx_fsr.vert + vulkan_fidelityfx_fsr_easu_fp16.frag + vulkan_fidelityfx_fsr_easu_fp32.frag + vulkan_fidelityfx_fsr_rcas_fp16.frag + vulkan_fidelityfx_fsr_rcas_fp32.frag vulkan_present.frag vulkan_present.vert vulkan_present_scaleforce_fp16.frag diff --git a/src/video_core/host_shaders/fidelityfx_fsr.comp b/src/video_core/host_shaders/fidelityfx_fsr.frag index f91b1aa9f..a266e1c4e 100644 --- a/src/video_core/host_shaders/fidelityfx_fsr.comp +++ b/src/video_core/host_shaders/fidelityfx_fsr.frag @@ -34,7 +34,6 @@ layout( push_constant ) uniform constants { }; layout(set=0,binding=0) uniform sampler2D InputTexture; -layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #define A_GPU 1 #define A_GLSL 1 @@ -72,44 +71,40 @@ layout(set=0,binding=1,rgba16f) uniform image2D OutputTexture; #include "ffx_fsr1.h" -void CurrFilter(AU2 pos) { -#if USE_BILINEAR - AF2 pp = (AF2(pos) * AF2_AU2(Const0.xy) + AF2_AU2(Const0.zw)) * AF2_AU2(Const1.xy) + AF2(0.5, -0.5) * AF2_AU2(Const1.zw); - imageStore(OutputTexture, ASU2(pos), textureLod(InputTexture, pp, 0.0)); +#if USE_RCAS + layout(location = 0) in vec2 frag_texcoord; #endif +layout (location = 0) out vec4 frag_color; + +void CurrFilter(AU2 pos) { #if USE_EASU #ifndef YUZU_USE_FP16 AF3 c; FsrEasuF(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + frag_color = AF4(c, 1.0); #else AH3 c; FsrEasuH(c, pos, Const0, Const1, Const2, Const3); - imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + frag_color = AH4(c, 1.0); #endif #endif #if USE_RCAS #ifndef YUZU_USE_FP16 AF3 c; FsrRcasF(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ASU2(pos), AF4(c, 1)); + frag_color = AF4(c, 1.0); #else AH3 c; FsrRcasH(c.r, c.g, c.b, pos, Const0); - imageStore(OutputTexture, ASU2(pos), AH4(c, 1)); + frag_color = AH4(c, 1.0); #endif #endif } -layout(local_size_x=64) in; void main() { - // Do remapping of local xy in workgroup for a more PS-like swizzle pattern. - AU2 gxy = ARmp8x8(gl_LocalInvocationID.x) + AU2(gl_WorkGroupID.x << 4u, gl_WorkGroupID.y << 4u); - CurrFilter(gxy); - gxy.x += 8u; - CurrFilter(gxy); - gxy.y += 8u; - CurrFilter(gxy); - gxy.x -= 8u; - CurrFilter(gxy); +#if USE_RCAS + CurrFilter(AU2(frag_texcoord * vec2(textureSize(InputTexture, 0)))); +#else + CurrFilter(AU2(gl_FragCoord.xy)); +#endif } diff --git a/src/video_core/host_shaders/fxaa.vert b/src/video_core/host_shaders/fxaa.vert index c2717d90d..223ab785e 100644 --- a/src/video_core/host_shaders/fxaa.vert +++ b/src/video_core/host_shaders/fxaa.vert @@ -7,8 +7,8 @@ out gl_PerVertex { vec4 gl_Position; }; -const vec2 vertices[4] = - vec2[4](vec2(-1.0, 1.0), vec2(1.0, 1.0), vec2(-1.0, -1.0), vec2(1.0, -1.0)); +const vec2 vertices[3] = + vec2[3](vec2(-1,-1), vec2(3,-1), vec2(-1, 3)); layout (location = 0) out vec4 posPos; diff --git a/src/video_core/host_shaders/opengl_present_scaleforce.frag b/src/video_core/host_shaders/opengl_present_scaleforce.frag index a780373e3..1598575a1 100644 --- a/src/video_core/host_shaders/opengl_present_scaleforce.frag +++ b/src/video_core/host_shaders/opengl_present_scaleforce.frag @@ -26,21 +26,11 @@ #endif -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - layout (location = 0) in vec2 tex_coord; layout (location = 0) out vec4 frag_color; -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D input_texture; +layout (binding = 0) uniform sampler2D input_texture; const bool ignore_alpha = true; diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag index c57dd2851..c814629cf 100644 --- a/src/video_core/host_shaders/present_bicubic.frag +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -3,22 +3,12 @@ #version 460 core -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) out vec4 color; -layout (binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; +layout (binding = 0) uniform sampler2D color_texture; vec4 cubic(float v) { vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; diff --git a/src/video_core/host_shaders/present_gaussian.frag b/src/video_core/host_shaders/present_gaussian.frag index 5f54b71b6..ad9bb76a4 100644 --- a/src/video_core/host_shaders/present_gaussian.frag +++ b/src/video_core/host_shaders/present_gaussian.frag @@ -7,21 +7,11 @@ #version 460 core -#ifdef VULKAN - -#define BINDING_COLOR_TEXTURE 1 - -#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv - -#define BINDING_COLOR_TEXTURE 0 - -#endif - layout(location = 0) in vec2 frag_tex_coord; layout(location = 0) out vec4 color; -layout(binding = BINDING_COLOR_TEXTURE) uniform sampler2D color_texture; +layout(binding = 0) uniform sampler2D color_texture; const float offset[3] = float[](0.0, 1.3846153846, 3.2307692308); const float weight[3] = float[](0.2270270270, 0.3162162162, 0.0702702703); diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr.vert b/src/video_core/host_shaders/vulkan_fidelityfx_fsr.vert new file mode 100644 index 000000000..6a87a7cac --- /dev/null +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr.vert @@ -0,0 +1,13 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 450 + +layout(location = 0) out vec2 texcoord; + +void main() { + float x = float((gl_VertexIndex & 1) << 2); + float y = float((gl_VertexIndex & 2) << 1); + gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0); + texcoord = vec2(x, y) / 2.0; +} diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag index 00af13726..d369bef06 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16.frag @@ -7,4 +7,4 @@ #define YUZU_USE_FP16 #define USE_EASU 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag index 13d783fa8..6f25ef00f 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32.frag @@ -6,4 +6,4 @@ #define USE_EASU 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag index 331549d96..0c953a900 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16.frag @@ -7,4 +7,4 @@ #define YUZU_USE_FP16 #define USE_RCAS 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag index 013ca0014..02e9a27c6 100644 --- a/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.comp +++ b/src/video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32.frag @@ -6,4 +6,4 @@ #define USE_RCAS 1 -#include "fidelityfx_fsr.comp" +#include "fidelityfx_fsr.frag" diff --git a/src/video_core/host_shaders/vulkan_present.frag b/src/video_core/host_shaders/vulkan_present.frag index 97e098ced..adada9411 100644 --- a/src/video_core/host_shaders/vulkan_present.frag +++ b/src/video_core/host_shaders/vulkan_present.frag @@ -7,7 +7,7 @@ layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) out vec4 color; -layout (binding = 1) uniform sampler2D color_texture; +layout (binding = 0) uniform sampler2D color_texture; void main() { color = texture(color_texture, frag_tex_coord); diff --git a/src/video_core/host_shaders/vulkan_present.vert b/src/video_core/host_shaders/vulkan_present.vert index 89dc80468..249c9675a 100644 --- a/src/video_core/host_shaders/vulkan_present.vert +++ b/src/video_core/host_shaders/vulkan_present.vert @@ -3,16 +3,37 @@ #version 460 core -layout (location = 0) in vec2 vert_position; -layout (location = 1) in vec2 vert_tex_coord; - layout (location = 0) out vec2 frag_tex_coord; -layout (set = 0, binding = 0) uniform MatrixBlock { +struct ScreenRectVertex { + vec2 position; + vec2 tex_coord; +}; + +layout (push_constant) uniform PushConstants { mat4 modelview_matrix; + ScreenRectVertex vertices[4]; }; +// Vulkan spec 15.8.1: +// Any member of a push constant block that is declared as an +// array must only be accessed with dynamically uniform indices. +ScreenRectVertex GetVertex(int index) { + switch (index) { + case 0: + default: + return vertices[0]; + case 1: + return vertices[1]; + case 2: + return vertices[2]; + case 3: + return vertices[3]; + } +} + void main() { - gl_Position = modelview_matrix * vec4(vert_position, 0.0, 1.0); - frag_tex_coord = vert_tex_coord; + ScreenRectVertex vertex = GetVertex(gl_VertexIndex); + gl_Position = modelview_matrix * vec4(vertex.position, 0.0, 1.0); + frag_tex_coord = vertex.tex_coord; } diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag index 3dc9c0df5..79ea817c2 100644 --- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp16.frag @@ -5,6 +5,7 @@ #extension GL_GOOGLE_include_directive : enable +#define VERSION 1 #define YUZU_USE_FP16 #include "opengl_present_scaleforce.frag" diff --git a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag index 77ed07552..9605bb58b 100644 --- a/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag +++ b/src/video_core/host_shaders/vulkan_present_scaleforce_fp32.frag @@ -5,4 +5,6 @@ #extension GL_GOOGLE_include_directive : enable +#define VERSION 1 + #include "opengl_present_scaleforce.frag" diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 8fa4e4d9a..6e2eccfbf 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -155,12 +155,6 @@ public: virtual void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span<const u8> memory) = 0; - /// Attempt to use a faster method to display the framebuffer to screen - [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { - return false; - } - /// Initialize disk cached resources for the game being emulated virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const DiskResourceLoadCallback& callback) {} diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 78ea5208b..3ad180f67 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -38,7 +38,7 @@ public: virtual ~RendererBase(); /// Finalize rendering the guest frame and draw into the presentation texture - virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0; + virtual void Composite(std::span<const Tegra::FramebufferConfig> layers) = 0; [[nodiscard]] virtual RasterizerInterface* ReadRasterizer() = 0; diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index abfabb65b..a5cda0f38 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -92,10 +92,6 @@ bool RasterizerNull::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surfac } void RasterizerNull::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span<const u8> memory) {} -bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { - return true; -} void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) {} void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) { diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h index a5789604f..c7f5849c7 100644 --- a/src/video_core/renderer_null/null_rasterizer.h +++ b/src/video_core/renderer_null/null_rasterizer.h @@ -77,8 +77,6 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span<const u8> memory) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, - u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; void InitializeChannel(Tegra::Control::ChannelState& channel) override; diff --git a/src/video_core/renderer_null/renderer_null.cpp b/src/video_core/renderer_null/renderer_null.cpp index 078feb925..c89daff53 100644 --- a/src/video_core/renderer_null/renderer_null.cpp +++ b/src/video_core/renderer_null/renderer_null.cpp @@ -13,8 +13,8 @@ RendererNull::RendererNull(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gp RendererNull::~RendererNull() = default; -void RendererNull::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererNull::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (framebuffers.empty()) { return; } diff --git a/src/video_core/renderer_null/renderer_null.h b/src/video_core/renderer_null/renderer_null.h index 9531b43f6..063b476bb 100644 --- a/src/video_core/renderer_null/renderer_null.h +++ b/src/video_core/renderer_null/renderer_null.h @@ -17,7 +17,7 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context); ~RendererNull() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span<const Tegra::FramebufferConfig> framebuffer) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &m_rasterizer; diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp new file mode 100644 index 000000000..6ba8b214b --- /dev/null +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -0,0 +1,96 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/settings.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/renderer_opengl/present/filters.h" +#include "video_core/renderer_opengl/present/layer.h" +#include "video_core/renderer_opengl/present/window_adapt_pass.h" + +namespace OpenGL { + +BlitScreen::BlitScreen(RasterizerOpenGL& rasterizer_, + Tegra::MaxwellDeviceMemoryManager& device_memory_, + StateTracker& state_tracker_, ProgramManager& program_manager_, + Device& device_) + : rasterizer(rasterizer_), device_memory(device_memory_), state_tracker(state_tracker_), + program_manager(program_manager_), device(device_) {} + +BlitScreen::~BlitScreen() = default; + +void BlitScreen::DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout) { + // TODO: Signal state tracker about these changes + state_tracker.NotifyScreenDrawVertexArray(); + state_tracker.NotifyPolygonModes(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyColorMask(0); + state_tracker.NotifyBlend0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyFrontFace(); + state_tracker.NotifyCullTest(); + state_tracker.NotifyDepthTest(); + state_tracker.NotifyStencilTest(); + state_tracker.NotifyPolygonOffset(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); + state_tracker.NotifyLogicOp(); + state_tracker.NotifyClipControl(); + state_tracker.NotifyAlphaTest(); + state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); + + glEnable(GL_CULL_FACE); + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glDepthRangeIndexed(0, 0.0, 0.0); + + while (layers.size() < framebuffers.size()) { + layers.emplace_back(rasterizer, device_memory); + } + + CreateWindowAdapt(); + window_adapt->DrawToFramebuffer(program_manager, layers, framebuffers, layout); + + // TODO + // program_manager.RestoreGuestPipeline(); +} + +void BlitScreen::CreateWindowAdapt() { + if (window_adapt && Settings::values.scaling_filter.GetValue() == current_window_adapt) { + return; + } + + current_window_adapt = Settings::values.scaling_filter.GetValue(); + switch (current_window_adapt) { + case Settings::ScalingFilter::NearestNeighbor: + window_adapt = MakeNearestNeighbor(device); + break; + case Settings::ScalingFilter::Bicubic: + window_adapt = MakeBicubic(device); + break; + case Settings::ScalingFilter::Gaussian: + window_adapt = MakeGaussian(device); + break; + case Settings::ScalingFilter::ScaleForce: + window_adapt = MakeScaleForce(device); + break; + case Settings::ScalingFilter::Fsr: + case Settings::ScalingFilter::Bilinear: + default: + window_adapt = MakeBilinear(device); + break; + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_blit_screen.h b/src/video_core/renderer_opengl/gl_blit_screen.h new file mode 100644 index 000000000..0c3d838f1 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_blit_screen.h @@ -0,0 +1,71 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <list> +#include <memory> +#include <span> + +#include "core/hle/service/nvnflinger/pixel_format.h" +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Tegra { +struct FramebufferConfig; +} + +namespace Settings { +enum class ScalingFilter : u32; +} + +namespace OpenGL { + +class Device; +class Layer; +class ProgramManager; +class RasterizerOpenGL; +class StateTracker; +class WindowAdaptPass; + +/// Structure used for storing information about the display target for the Switch screen +struct FramebufferTextureInfo { + GLuint display_texture{}; + u32 width; + u32 height; + u32 scaled_width; + u32 scaled_height; +}; + +class BlitScreen { +public: + explicit BlitScreen(RasterizerOpenGL& rasterizer, + Tegra::MaxwellDeviceMemoryManager& device_memory, + StateTracker& state_tracker, ProgramManager& program_manager, + Device& device); + ~BlitScreen(); + + /// Draws the emulated screens to the emulator window. + void DrawScreen(std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout); + +private: + void CreateWindowAdapt(); + + RasterizerOpenGL& rasterizer; + Tegra::MaxwellDeviceMemoryManager& device_memory; + StateTracker& state_tracker; + ProgramManager& program_manager; + Device& device; + + Settings::ScalingFilter current_window_adapt{}; + std::unique_ptr<WindowAdaptPass> window_adapt; + + std::list<Layer> layers; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fsr.cpp b/src/video_core/renderer_opengl/gl_fsr.cpp deleted file mode 100644 index 77262dcf1..000000000 --- a/src/video_core/renderer_opengl/gl_fsr.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/settings.h" -#include "video_core/fsr.h" -#include "video_core/renderer_opengl/gl_fsr.h" -#include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/renderer_opengl/gl_shader_util.h" - -namespace OpenGL { -using namespace FSR; - -using FsrConstants = std::array<u32, 4 * 4>; - -FSR::FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, - std::string_view fsr_rcas_source) - : fsr_vertex{CreateProgram(fsr_vertex_source, GL_VERTEX_SHADER)}, - fsr_easu_frag{CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER)}, - fsr_rcas_frag{CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER)} { - glProgramUniform2f(fsr_vertex.handle, 0, 1.0f, 1.0f); - glProgramUniform2f(fsr_vertex.handle, 1, 0.0f, 0.0f); -} - -FSR::~FSR() = default; - -void FSR::Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, - u32 input_image_width, u32 input_image_height, - const Common::Rectangle<int>& crop_rect) { - - const auto output_image_width = screen.GetWidth(); - const auto output_image_height = screen.GetHeight(); - - if (fsr_intermediate_tex.handle) { - GLint fsr_tex_width, fsr_tex_height; - glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_WIDTH, - &fsr_tex_width); - glGetTextureLevelParameteriv(fsr_intermediate_tex.handle, 0, GL_TEXTURE_HEIGHT, - &fsr_tex_height); - if (static_cast<u32>(fsr_tex_width) != output_image_width || - static_cast<u32>(fsr_tex_height) != output_image_height) { - fsr_intermediate_tex.Release(); - } - } - if (!fsr_intermediate_tex.handle) { - fsr_intermediate_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(fsr_intermediate_tex.handle, 1, GL_RGB16F, output_image_width, - output_image_height); - glNamedFramebufferTexture(fsr_framebuffer.handle, GL_COLOR_ATTACHMENT0, - fsr_intermediate_tex.handle, 0); - } - - GLint old_draw_fb; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - - glFrontFace(GL_CW); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fsr_framebuffer.handle); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(output_image_width), - static_cast<GLfloat>(output_image_height)); - - FsrConstants constants; - FsrEasuConOffset( - constants.data() + 0, constants.data() + 4, constants.data() + 8, constants.data() + 12, - - static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), - static_cast<f32>(input_image_width), static_cast<f32>(input_image_height), - static_cast<f32>(output_image_width), static_cast<f32>(output_image_height), - static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); - - glProgramUniform4uiv(fsr_easu_frag.handle, 0, sizeof(constants), std::data(constants)); - - program_manager.BindPresentPrograms(fsr_vertex.handle, fsr_easu_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - glBindTextureUnit(0, fsr_intermediate_tex.handle); - - const float sharpening = - static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; - - FsrRcasCon(constants.data(), sharpening); - glProgramUniform4uiv(fsr_rcas_frag.handle, 0, sizeof(constants), std::data(constants)); -} - -void FSR::InitBuffers() { - fsr_framebuffer.Create(); -} - -void FSR::ReleaseBuffers() { - fsr_framebuffer.Release(); - fsr_intermediate_tex.Release(); -} - -const OGLProgram& FSR::GetPresentFragmentProgram() const noexcept { - return fsr_rcas_frag; -} - -bool FSR::AreBuffersInitialized() const noexcept { - return fsr_framebuffer.handle; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_fsr.h b/src/video_core/renderer_opengl/gl_fsr.h deleted file mode 100644 index 1f6ae3115..000000000 --- a/src/video_core/renderer_opengl/gl_fsr.h +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include <string_view> - -#include "common/common_types.h" -#include "common/math_util.h" -#include "video_core/fsr.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" - -namespace OpenGL { - -class ProgramManager; - -class FSR { -public: - explicit FSR(std::string_view fsr_vertex_source, std::string_view fsr_easu_source, - std::string_view fsr_rcas_source); - ~FSR(); - - void Draw(ProgramManager& program_manager, const Common::Rectangle<u32>& screen, - u32 input_image_width, u32 input_image_height, - const Common::Rectangle<int>& crop_rect); - - void InitBuffers(); - - void ReleaseBuffers(); - - [[nodiscard]] const OGLProgram& GetPresentFragmentProgram() const noexcept; - - [[nodiscard]] bool AreBuffersInitialized() const noexcept; - -private: - OGLFramebuffer fsr_framebuffer; - OGLProgram fsr_vertex; - OGLProgram fsr_easu_frag; - OGLProgram fsr_rcas_frag; - OGLTexture fsr_intermediate_tex; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d5354ef2d..b42fb110c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -71,10 +71,10 @@ std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryTy RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - const Device& device_, ScreenInfo& screen_info_, - ProgramManager& program_manager_, StateTracker& state_tracker_) - : gpu(gpu_), device_memory(device_memory_), device(device_), screen_info(screen_info_), - program_manager(program_manager_), state_tracker(state_tracker_), + const Device& device_, ProgramManager& program_manager_, + StateTracker& state_tracker_) + : gpu(gpu_), device_memory(device_memory_), device(device_), program_manager(program_manager_), + state_tracker(state_tracker_), texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool), texture_cache(texture_cache_runtime, device_memory_), buffer_cache_runtime(device, staging_buffer_pool), @@ -739,27 +739,29 @@ void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si query_cache.InvalidateRegion(*cpu_addr, copy_size); } -bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { +std::optional<FramebufferTextureInfo> RasterizerOpenGL::AccelerateDisplay( + const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) { if (framebuffer_addr == 0) { - return false; + return {}; } MICROPROFILE_SCOPE(OpenGL_CacheManagement); std::scoped_lock lock{texture_cache.mutex}; - ImageView* const image_view{ - texture_cache.TryFindFramebufferImageView(config, framebuffer_addr)}; + const auto [image_view, scaled] = + texture_cache.TryFindFramebufferImageView(config, framebuffer_addr); if (!image_view) { - return false; + return {}; } - // Verify that the cached surface is the same size and format as the requested framebuffer - // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different"); - // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different"); - screen_info.texture.width = image_view->size.width; - screen_info.texture.height = image_view->size.height; - screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); - return true; + const auto& resolution = Settings::values.resolution_info; + + FramebufferTextureInfo info{}; + info.display_texture = image_view->Handle(Shader::TextureType::Color2D); + info.width = image_view->size.width; + info.height = image_view->size.height; + info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width; + info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height; + return info; } void RasterizerOpenGL::SyncState() { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 34aa73526..6eae51ff7 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -16,6 +16,7 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/blit_image.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_fence_manager.h" @@ -37,7 +38,7 @@ class MemoryManager; namespace OpenGL { -struct ScreenInfo; +struct FramebufferTextureInfo; struct ShaderEntries; struct BindlessSSBO { @@ -76,8 +77,8 @@ class RasterizerOpenGL : public VideoCore::RasterizerInterface, public: explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - const Device& device_, ScreenInfo& screen_info_, - ProgramManager& program_manager_, StateTracker& state_tracker_); + const Device& device_, ProgramManager& program_manager_, + StateTracker& state_tracker_); ~RasterizerOpenGL() override; void Draw(bool is_indexed, u32 instance_count) override; @@ -122,8 +123,6 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span<const u8> memory) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, - u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -144,6 +143,10 @@ public: return true; } + std::optional<FramebufferTextureInfo> AccelerateDisplay(const Tegra::FramebufferConfig& config, + VAddr framebuffer_addr, + u32 pixel_stride); + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -237,7 +240,6 @@ private: Tegra::MaxwellDeviceMemoryManager& device_memory; const Device& device; - ScreenInfo& screen_info; ProgramManager& program_manager; StateTracker& state_tracker; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 66a5ca03e..be14494ca 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -1051,6 +1051,10 @@ void Image::Scale(bool up_scale) { state_tracker.NotifyScissor0(); } +bool Image::IsRescaled() const { + return True(flags & ImageFlagBits::Rescaled); +} + bool Image::ScaleUp(bool ignore) { const auto& resolution = runtime->resolution; if (!resolution.active) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 34870c81f..3e54edcc2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -217,6 +217,8 @@ public: return gl_type; } + bool IsRescaled() const; + bool ScaleUp(bool ignore = false); bool ScaleDown(bool ignore = false); diff --git a/src/video_core/renderer_opengl/present/filters.cpp b/src/video_core/renderer_opengl/present/filters.cpp new file mode 100644 index 000000000..819e5d77f --- /dev/null +++ b/src/video_core/renderer_opengl/present/filters.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/host_shaders/opengl_present_frag.h" +#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" +#include "video_core/host_shaders/present_bicubic_frag.h" +#include "video_core/host_shaders/present_gaussian_frag.h" +#include "video_core/renderer_opengl/present/filters.h" +#include "video_core/renderer_opengl/present/util.h" + +namespace OpenGL { + +std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device) { + return std::make_unique<WindowAdaptPass>(device, CreateNearestNeighborSampler(), + HostShaders::OPENGL_PRESENT_FRAG); +} + +std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device) { + return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(), + HostShaders::OPENGL_PRESENT_FRAG); +} + +std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device) { + return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(), + HostShaders::PRESENT_BICUBIC_FRAG); +} + +std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device) { + return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(), + HostShaders::PRESENT_GAUSSIAN_FRAG); +} + +std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device) { + return std::make_unique<WindowAdaptPass>( + device, CreateBilinearSampler(), + fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG)); +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/filters.h b/src/video_core/renderer_opengl/present/filters.h new file mode 100644 index 000000000..122ab7436 --- /dev/null +++ b/src/video_core/renderer_opengl/present/filters.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <memory> +#include "video_core/renderer_opengl/present/window_adapt_pass.h" + +namespace OpenGL { + +std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device); +std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device); +std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device); +std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device); +std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device); + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/fsr.cpp b/src/video_core/renderer_opengl/present/fsr.cpp new file mode 100644 index 000000000..b764aadae --- /dev/null +++ b/src/video_core/renderer_opengl/present/fsr.cpp @@ -0,0 +1,98 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/settings.h" +#include "video_core/fsr.h" +#include "video_core/host_shaders/ffx_a_h.h" +#include "video_core/host_shaders/ffx_fsr1_h.h" +#include "video_core/host_shaders/full_screen_triangle_vert.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" +#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/fsr.h" +#include "video_core/renderer_opengl/present/util.h" + +namespace OpenGL { +using namespace FSR; + +using FsrConstants = std::array<u32, 4 * 4>; + +FSR::FSR(u32 output_width_, u32 output_height_) : width(output_width_), height(output_height_) { + std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; + ReplaceInclude(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); + ReplaceInclude(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); + + std::string fsr_easu_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; + std::string fsr_rcas_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; + ReplaceInclude(fsr_easu_source, "opengl_fidelityfx_fsr.frag", fsr_source); + ReplaceInclude(fsr_rcas_source, "opengl_fidelityfx_fsr.frag", fsr_source); + + vert = CreateProgram(HostShaders::FULL_SCREEN_TRIANGLE_VERT, GL_VERTEX_SHADER); + easu_frag = CreateProgram(fsr_easu_source, GL_FRAGMENT_SHADER); + rcas_frag = CreateProgram(fsr_rcas_source, GL_FRAGMENT_SHADER); + + glProgramUniform2f(vert.handle, 0, 1.0f, -1.0f); + glProgramUniform2f(vert.handle, 1, 0.0f, 1.0f); + + sampler = CreateBilinearSampler(); + framebuffer.Create(); + + easu_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(easu_tex.handle, 1, GL_RGBA16F, width, height); + + rcas_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(rcas_tex.handle, 1, GL_RGBA16F, width, height); +} + +FSR::~FSR() = default; + +GLuint FSR::Draw(ProgramManager& program_manager, GLuint texture, u32 input_image_width, + u32 input_image_height, const Common::Rectangle<f32>& crop_rect) { + const f32 input_width = static_cast<f32>(input_image_width); + const f32 input_height = static_cast<f32>(input_image_height); + const f32 output_width = static_cast<f32>(width); + const f32 output_height = static_cast<f32>(height); + const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_width; + const f32 viewport_x = crop_rect.left * input_width; + const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_height; + const f32 viewport_y = crop_rect.top * input_height; + + FsrConstants easu_con{}; + FsrConstants rcas_con{}; + + FsrEasuConOffset(easu_con.data() + 0, easu_con.data() + 4, easu_con.data() + 8, + easu_con.data() + 12, viewport_width, viewport_height, input_width, + input_height, output_width, output_height, viewport_x, viewport_y); + + const float sharpening = + static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; + + FsrRcasCon(rcas_con.data(), sharpening); + + glProgramUniform4uiv(easu_frag.handle, 0, sizeof(easu_con), easu_con.data()); + glProgramUniform4uiv(rcas_frag.handle, 0, sizeof(rcas_con), rcas_con.data()); + + glFrontFace(GL_CW); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, easu_tex.handle, 0); + glViewportIndexedf(0, 0.0f, 0.0f, output_width, output_height); + program_manager.BindPresentPrograms(vert.handle, easu_frag.handle); + glBindTextureUnit(0, texture); + glBindSampler(0, sampler.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, rcas_tex.handle, 0); + program_manager.BindPresentPrograms(vert.handle, rcas_frag.handle); + glBindTextureUnit(0, easu_tex.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + return rcas_tex.handle; +} + +bool FSR::NeedsRecreation(const Common::Rectangle<u32>& screen) { + return screen.GetWidth() != width || screen.GetHeight() != height; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/fsr.h b/src/video_core/renderer_opengl/present/fsr.h new file mode 100644 index 000000000..606935a01 --- /dev/null +++ b/src/video_core/renderer_opengl/present/fsr.h @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <string_view> + +#include "common/common_types.h" +#include "common/math_util.h" +#include "video_core/fsr.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class FSR { +public: + explicit FSR(u32 output_width, u32 output_height); + ~FSR(); + + GLuint Draw(ProgramManager& program_manager, GLuint texture, u32 input_image_width, + u32 input_image_height, const Common::Rectangle<f32>& crop_rect); + + bool NeedsRecreation(const Common::Rectangle<u32>& screen); + +private: + const u32 width; + const u32 height; + OGLFramebuffer framebuffer; + OGLSampler sampler; + OGLProgram vert; + OGLProgram easu_frag; + OGLProgram rcas_frag; + OGLTexture easu_tex; + OGLTexture rcas_tex; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/fxaa.cpp b/src/video_core/renderer_opengl/present/fxaa.cpp new file mode 100644 index 000000000..d9b58512d --- /dev/null +++ b/src/video_core/renderer_opengl/present/fxaa.cpp @@ -0,0 +1,41 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/host_shaders/fxaa_frag.h" +#include "video_core/host_shaders/fxaa_vert.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/fxaa.h" +#include "video_core/renderer_opengl/present/util.h" + +namespace OpenGL { + +FXAA::FXAA(u32 width, u32 height) { + vert_shader = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); + frag_shader = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); + + sampler = CreateBilinearSampler(); + + framebuffer.Create(); + + texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(texture.handle, 1, GL_RGBA16F, width, height); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0); +} + +FXAA::~FXAA() = default; + +GLuint FXAA::Draw(ProgramManager& program_manager, GLuint input_texture) { + glFrontFace(GL_CCW); + + program_manager.BindPresentPrograms(vert_shader.handle, frag_shader.handle); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); + glBindTextureUnit(0, input_texture); + glBindSampler(0, sampler.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + glFrontFace(GL_CW); + + return texture.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/fxaa.h b/src/video_core/renderer_opengl/present/fxaa.h new file mode 100644 index 000000000..b898198f1 --- /dev/null +++ b/src/video_core/renderer_opengl/present/fxaa.h @@ -0,0 +1,27 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class FXAA { +public: + explicit FXAA(u32 width, u32 height); + ~FXAA(); + + GLuint Draw(ProgramManager& program_manager, GLuint input_texture); + +private: + OGLProgram vert_shader; + OGLProgram frag_shader; + OGLSampler sampler; + OGLFramebuffer framebuffer; + OGLTexture texture; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/layer.cpp b/src/video_core/renderer_opengl/present/layer.cpp new file mode 100644 index 000000000..8643e07c6 --- /dev/null +++ b/src/video_core/renderer_opengl/present/layer.cpp @@ -0,0 +1,215 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/framebuffer_config.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/present/fsr.h" +#include "video_core/renderer_opengl/present/fxaa.h" +#include "video_core/renderer_opengl/present/layer.h" +#include "video_core/renderer_opengl/present/present_uniforms.h" +#include "video_core/renderer_opengl/present/smaa.h" +#include "video_core/surface.h" +#include "video_core/textures/decoders.h" + +namespace OpenGL { + +Layer::Layer(RasterizerOpenGL& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_) + : rasterizer(rasterizer_), device_memory(device_memory_) { + // Allocate textures for the screen + framebuffer_texture.resource.Create(GL_TEXTURE_2D); + + const GLuint texture = framebuffer_texture.resource.handle; + glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1); + + // Clear screen to black + const u8 framebuffer_data[4] = {0, 0, 0, 0}; + glClearTexImage(framebuffer_texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, + framebuffer_data); +} + +Layer::~Layer() = default; + +GLuint Layer::ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix, + std::array<ScreenRectVertex, 4>& out_vertices, + ProgramManager& program_manager, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout) { + FramebufferTextureInfo info = PrepareRenderTarget(framebuffer); + auto crop = Tegra::NormalizeCrop(framebuffer, info.width, info.height); + GLuint texture = info.display_texture; + + auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); + if (anti_aliasing != Settings::AntiAliasing::None) { + glEnablei(GL_SCISSOR_TEST, 0); + auto viewport_width = Settings::values.resolution_info.ScaleUp(framebuffer_texture.width); + auto viewport_height = Settings::values.resolution_info.ScaleUp(framebuffer_texture.height); + + glScissorIndexed(0, 0, 0, viewport_width, viewport_height); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width), + static_cast<GLfloat>(viewport_height)); + + switch (anti_aliasing) { + case Settings::AntiAliasing::Fxaa: + CreateFXAA(); + texture = fxaa->Draw(program_manager, info.display_texture); + break; + case Settings::AntiAliasing::Smaa: + default: + CreateSMAA(); + texture = smaa->Draw(program_manager, info.display_texture); + break; + } + } + + glDisablei(GL_SCISSOR_TEST, 0); + + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + if (!fsr || fsr->NeedsRecreation(layout.screen)) { + fsr = std::make_unique<FSR>(layout.screen.GetWidth(), layout.screen.GetHeight()); + } + + texture = fsr->Draw(program_manager, texture, info.scaled_width, info.scaled_height, crop); + crop = {0, 0, 1, 1}; + } + + out_matrix = + MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); + + // Map the coordinates to the screen. + const auto& screen = layout.screen; + const auto x = screen.left; + const auto y = screen.top; + const auto w = screen.GetWidth(); + const auto h = screen.GetHeight(); + + out_vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top); + out_vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top); + out_vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom); + out_vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom); + + return texture; +} + +FramebufferTextureInfo Layer::PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer) { + // If framebuffer is provided, reload it from memory to a texture + if (framebuffer_texture.width != static_cast<GLsizei>(framebuffer.width) || + framebuffer_texture.height != static_cast<GLsizei>(framebuffer.height) || + framebuffer_texture.pixel_format != framebuffer.pixel_format || + gl_framebuffer_data.empty()) { + // Reallocate texture if the framebuffer size has changed. + // This is expected to not happen very often and hence should not be a + // performance problem. + ConfigureFramebufferTexture(framebuffer); + } + + // Load the framebuffer from memory if needed + return LoadFBToScreenInfo(framebuffer); +} + +FramebufferTextureInfo Layer::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { + const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; + const auto accelerated_info = + rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); + if (accelerated_info) { + return *accelerated_info; + } + + // Reset the screen info's display texture to its own permanent texture + FramebufferTextureInfo info{}; + info.display_texture = framebuffer_texture.resource.handle; + info.width = framebuffer.width; + info.height = framebuffer.height; + info.scaled_width = framebuffer.width; + info.scaled_height = framebuffer.height; + + // TODO(Rodrigo): Read this from HLE + constexpr u32 block_height_log2 = 4; + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + const u64 size_in_bytes{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)}; + const std::span<const u8> input_data(host_ptr, size_in_bytes); + Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, + framebuffer.width, framebuffer.height, 1, block_height_log2, + 0); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); + + // Update existing texture + // TODO: Test what happens on hardware when you change the framebuffer dimensions so that + // they differ from the LCD resolution. + // TODO: Applications could theoretically crash yuzu here by specifying too large + // framebuffer sizes. We should make sure that this cannot happen. + glTextureSubImage2D(framebuffer_texture.resource.handle, 0, 0, 0, framebuffer.width, + framebuffer.height, framebuffer_texture.gl_format, + framebuffer_texture.gl_type, gl_framebuffer_data.data()); + + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + return info; +} + +void Layer::ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer) { + framebuffer_texture.width = framebuffer.width; + framebuffer_texture.height = framebuffer.height; + framebuffer_texture.pixel_format = framebuffer.pixel_format; + + const auto pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; + const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; + gl_framebuffer_data.resize(framebuffer_texture.width * framebuffer_texture.height * + bytes_per_pixel); + + GLint internal_format; + switch (framebuffer.pixel_format) { + case Service::android::PixelFormat::Rgba8888: + internal_format = GL_RGBA8; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + break; + case Service::android::PixelFormat::Rgb565: + internal_format = GL_RGB565; + framebuffer_texture.gl_format = GL_RGB; + framebuffer_texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; + break; + default: + internal_format = GL_RGBA8; + framebuffer_texture.gl_format = GL_RGBA; + framebuffer_texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; + // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + // static_cast<u32>(framebuffer.pixel_format)); + break; + } + + framebuffer_texture.resource.Release(); + framebuffer_texture.resource.Create(GL_TEXTURE_2D); + glTextureStorage2D(framebuffer_texture.resource.handle, 1, internal_format, + framebuffer_texture.width, framebuffer_texture.height); + + fxaa.reset(); + smaa.reset(); +} + +void Layer::CreateFXAA() { + smaa.reset(); + if (!fxaa) { + fxaa = std::make_unique<FXAA>( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + } +} + +void Layer::CreateSMAA() { + fxaa.reset(); + if (!smaa) { + smaa = std::make_unique<SMAA>( + Settings::values.resolution_info.ScaleUp(framebuffer_texture.width), + Settings::values.resolution_info.ScaleUp(framebuffer_texture.height)); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/layer.h b/src/video_core/renderer_opengl/present/layer.h new file mode 100644 index 000000000..ef1055abf --- /dev/null +++ b/src/video_core/renderer_opengl/present/layer.h @@ -0,0 +1,80 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <memory> +#include <vector> + +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Service::android { +enum class PixelFormat : u32; +}; + +namespace Tegra { +struct FramebufferConfig; +} + +namespace OpenGL { + +struct FramebufferTextureInfo; +class FSR; +class FXAA; +class ProgramManager; +class RasterizerOpenGL; +class SMAA; + +/// Structure used for storing information about the textures for the Switch screen +struct TextureInfo { + OGLTexture resource; + GLsizei width; + GLsizei height; + GLenum gl_format; + GLenum gl_type; + Service::android::PixelFormat pixel_format; +}; + +struct ScreenRectVertex; + +class Layer { +public: + explicit Layer(RasterizerOpenGL& rasterizer, Tegra::MaxwellDeviceMemoryManager& device_memory); + ~Layer(); + + GLuint ConfigureDraw(std::array<GLfloat, 3 * 2>& out_matrix, + std::array<ScreenRectVertex, 4>& out_vertices, + ProgramManager& program_manager, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout); + +private: + /// Loads framebuffer from emulated memory into the active OpenGL texture. + FramebufferTextureInfo LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); + FramebufferTextureInfo PrepareRenderTarget(const Tegra::FramebufferConfig& framebuffer); + void ConfigureFramebufferTexture(const Tegra::FramebufferConfig& framebuffer); + + void CreateFXAA(); + void CreateSMAA(); + +private: + RasterizerOpenGL& rasterizer; + Tegra::MaxwellDeviceMemoryManager& device_memory; + + /// OpenGL framebuffer data + std::vector<u8> gl_framebuffer_data; + + /// Display information for Switch screen + TextureInfo framebuffer_texture; + + std::unique_ptr<FSR> fsr; + std::unique_ptr<FXAA> fxaa; + std::unique_ptr<SMAA> smaa; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/present_uniforms.h b/src/video_core/renderer_opengl/present/present_uniforms.h new file mode 100644 index 000000000..3a19f05c7 --- /dev/null +++ b/src/video_core/renderer_opengl/present/present_uniforms.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +constexpr GLint PositionLocation = 0; +constexpr GLint TexCoordLocation = 1; +constexpr GLint ModelViewMatrixLocation = 0; + +struct ScreenRectVertex { + constexpr ScreenRectVertex() = default; + + constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) + : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {} + + std::array<GLfloat, 2> position{}; + std::array<GLfloat, 2> tex_coord{}; +}; + +/** + * Defines a 1:1 pixel orthographic projection matrix with (0,0) on the top-left + * corner and (width, height) on the lower-bottom. + * + * The projection part of the matrix is trivial, hence these operations are represented + * by a 3x2 matrix. + */ +static inline std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) { + std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order + + // clang-format off + matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; + matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; + // Last matrix row is implicitly assumed to be [0, 0, 1]. + // clang-format on + + return matrix; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/smaa.cpp b/src/video_core/renderer_opengl/present/smaa.cpp new file mode 100644 index 000000000..de7f6e502 --- /dev/null +++ b/src/video_core/renderer_opengl/present/smaa.cpp @@ -0,0 +1,102 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/host_shaders/opengl_smaa_glsl.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h" +#include "video_core/host_shaders/smaa_edge_detection_frag.h" +#include "video_core/host_shaders/smaa_edge_detection_vert.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/smaa.h" +#include "video_core/renderer_opengl/present/util.h" +#include "video_core/smaa_area_tex.h" +#include "video_core/smaa_search_tex.h" + +namespace OpenGL { + +SMAA::SMAA(u32 width, u32 height) { + const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { + std::string shader_source{specialized_source}; + ReplaceInclude(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); + return CreateProgram(shader_source, stage); + }; + + edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER); + edge_detection_frag = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER); + blending_weight_calculation_vert = + SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER); + blending_weight_calculation_frag = + SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER); + neighborhood_blending_vert = + SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER); + neighborhood_blending_frag = + SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER); + + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); + + area_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT); + glTextureSubImage2D(area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG, + GL_UNSIGNED_BYTE, areaTexBytes); + search_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT); + glTextureSubImage2D(search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED, + GL_UNSIGNED_BYTE, searchTexBytes); + + edges_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(edges_tex.handle, 1, GL_RG16F, width, height); + + blend_tex.Create(GL_TEXTURE_2D); + glTextureStorage2D(blend_tex.handle, 1, GL_RGBA16F, width, height); + + sampler = CreateBilinearSampler(); + + framebuffer.Create(); + + texture.Create(GL_TEXTURE_2D); + glTextureStorage2D(texture.handle, 1, GL_RGBA16F, width, height); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0); +} + +SMAA::~SMAA() = default; + +GLuint SMAA::Draw(ProgramManager& program_manager, GLuint input_texture) { + glClearColor(0, 0, 0, 0); + glFrontFace(GL_CCW); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); + glBindSampler(0, sampler.handle); + glBindSampler(1, sampler.handle); + glBindSampler(2, sampler.handle); + + glBindTextureUnit(0, input_texture); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, edges_tex.handle, 0); + glClear(GL_COLOR_BUFFER_BIT); + program_manager.BindPresentPrograms(edge_detection_vert.handle, edge_detection_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindTextureUnit(0, edges_tex.handle); + glBindTextureUnit(1, area_tex.handle); + glBindTextureUnit(2, search_tex.handle); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, blend_tex.handle, 0); + glClear(GL_COLOR_BUFFER_BIT); + program_manager.BindPresentPrograms(blending_weight_calculation_vert.handle, + blending_weight_calculation_frag.handle); + glDrawArrays(GL_TRIANGLES, 0, 3); + + glBindTextureUnit(0, input_texture); + glBindTextureUnit(1, blend_tex.handle); + glNamedFramebufferTexture(framebuffer.handle, GL_COLOR_ATTACHMENT0, texture.handle, 0); + program_manager.BindPresentPrograms(neighborhood_blending_vert.handle, + neighborhood_blending_frag.handle); + glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLES, 0, 3); + glFrontFace(GL_CW); + + return texture.handle; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/smaa.h b/src/video_core/renderer_opengl/present/smaa.h new file mode 100644 index 000000000..a48cb4fa9 --- /dev/null +++ b/src/video_core/renderer_opengl/present/smaa.h @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +class ProgramManager; + +class SMAA { +public: + explicit SMAA(u32 width, u32 height); + ~SMAA(); + + GLuint Draw(ProgramManager& program_manager, GLuint input_texture); + +private: + OGLProgram edge_detection_vert; + OGLProgram blending_weight_calculation_vert; + OGLProgram neighborhood_blending_vert; + OGLProgram edge_detection_frag; + OGLProgram blending_weight_calculation_frag; + OGLProgram neighborhood_blending_frag; + OGLTexture area_tex; + OGLTexture search_tex; + OGLTexture edges_tex; + OGLTexture blend_tex; + OGLSampler sampler; + OGLFramebuffer framebuffer; + OGLTexture texture; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/util.h b/src/video_core/renderer_opengl/present/util.h new file mode 100644 index 000000000..67f03aa27 --- /dev/null +++ b/src/video_core/renderer_opengl/present/util.h @@ -0,0 +1,43 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <string> + +#include "common/assert.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace OpenGL { + +static inline void ReplaceInclude(std::string& shader_source, std::string_view include_name, + std::string_view include_content) { + const std::string include_string = fmt::format("#include \"{}\"", include_name); + const std::size_t pos = shader_source.find(include_string); + ASSERT(pos != std::string::npos); + shader_source.replace(pos, include_string.size(), include_content); +}; + +static inline OGLSampler CreateBilinearSampler() { + OGLSampler sampler; + sampler.Create(); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + return sampler; +} + +static inline OGLSampler CreateNearestNeighborSampler() { + OGLSampler sampler; + sampler.Create(); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(sampler.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glSamplerParameteri(sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); + return sampler; +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.cpp b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp new file mode 100644 index 000000000..4d681606b --- /dev/null +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.cpp @@ -0,0 +1,103 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/settings.h" +#include "video_core/framebuffer_config.h" +#include "video_core/host_shaders/opengl_present_vert.h" +#include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_shader_util.h" +#include "video_core/renderer_opengl/present/layer.h" +#include "video_core/renderer_opengl/present/present_uniforms.h" +#include "video_core/renderer_opengl/present/window_adapt_pass.h" + +namespace OpenGL { + +WindowAdaptPass::WindowAdaptPass(const Device& device_, OGLSampler&& sampler_, + std::string_view frag_source) + : device(device_), sampler(std::move(sampler_)) { + vert = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); + frag = CreateProgram(frag_source, GL_FRAGMENT_SHADER); + + // Generate VBO handle for drawing + vertex_buffer.Create(); + + // Attach vertex data to VAO + glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); + + // Query vertex buffer address when the driver supports unified vertex attributes + if (device.HasVertexBufferUnifiedMemory()) { + glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); + glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, + &vertex_buffer_address); + } +} + +WindowAdaptPass::~WindowAdaptPass() = default; + +void WindowAdaptPass::DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers, + std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout) { + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); + + const size_t layer_count = framebuffers.size(); + std::vector<GLuint> textures(layer_count); + std::vector<std::array<GLfloat, 3 * 2>> matrices(layer_count); + std::vector<std::array<ScreenRectVertex, 4>> vertices(layer_count); + + auto layer_it = layers.begin(); + for (size_t i = 0; i < layer_count; i++) { + textures[i] = layer_it->ConfigureDraw(matrices[i], vertices[i], program_manager, + framebuffers[i], layout); + layer_it++; + } + + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + + program_manager.BindPresentPrograms(vert.handle, frag.handle); + + glDisable(GL_FRAMEBUFFER_SRGB); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), + static_cast<GLfloat>(layout.height)); + + glEnableVertexAttribArray(PositionLocation); + glEnableVertexAttribArray(TexCoordLocation); + glVertexAttribDivisor(PositionLocation, 0); + glVertexAttribDivisor(TexCoordLocation, 0); + glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, position)); + glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, tex_coord)); + glVertexAttribBinding(PositionLocation, 0); + glVertexAttribBinding(TexCoordLocation, 0); + if (device.HasVertexBufferUnifiedMemory()) { + glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); + glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, + sizeof(decltype(vertices)::value_type)); + } else { + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + } + + glBindSampler(0, sampler.handle); + + // Update background color before drawing + glClearColor(Settings::values.bg_red.GetValue() / 255.0f, + Settings::values.bg_green.GetValue() / 255.0f, + Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); + + glClear(GL_COLOR_BUFFER_BIT); + + for (size_t i = 0; i < layer_count; i++) { + glBindTextureUnit(0, textures[i]); + glProgramUniformMatrix3x2fv(vert.handle, ModelViewMatrixLocation, 1, GL_FALSE, + matrices[i].data()); + glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices[i]), std::data(vertices[i])); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/window_adapt_pass.h b/src/video_core/renderer_opengl/present/window_adapt_pass.h new file mode 100644 index 000000000..00975a9c6 --- /dev/null +++ b/src/video_core/renderer_opengl/present/window_adapt_pass.h @@ -0,0 +1,47 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <list> +#include <span> + +#include "common/math_util.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Tegra { +struct FramebufferConfig; +} + +namespace OpenGL { + +class Device; +class Layer; +class ProgramManager; + +class WindowAdaptPass final { +public: + explicit WindowAdaptPass(const Device& device, OGLSampler&& sampler, + std::string_view frag_source); + ~WindowAdaptPass(); + + void DrawToFramebuffer(ProgramManager& program_manager, std::list<Layer>& layers, + std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout); + +private: + const Device& device; + OGLSampler sampler; + OGLProgram vert; + OGLProgram frag; + OGLBuffer vertex_buffer; + + // GPU address of the vertex buffer + GLuint64EXT vertex_buffer_address = 0; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b75376fdb..e33a32592 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -16,68 +16,15 @@ #include "core/core_timing.h" #include "core/frontend/emu_window.h" #include "core/telemetry_session.h" -#include "video_core/host_shaders/ffx_a_h.h" -#include "video_core/host_shaders/ffx_fsr1_h.h" -#include "video_core/host_shaders/full_screen_triangle_vert.h" -#include "video_core/host_shaders/fxaa_frag.h" -#include "video_core/host_shaders/fxaa_vert.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_easu_frag.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_frag.h" -#include "video_core/host_shaders/opengl_fidelityfx_fsr_rcas_frag.h" -#include "video_core/host_shaders/opengl_present_frag.h" -#include "video_core/host_shaders/opengl_present_scaleforce_frag.h" -#include "video_core/host_shaders/opengl_present_vert.h" -#include "video_core/host_shaders/opengl_smaa_glsl.h" -#include "video_core/host_shaders/present_bicubic_frag.h" -#include "video_core/host_shaders/present_gaussian_frag.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_frag.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_vert.h" -#include "video_core/host_shaders/smaa_edge_detection_frag.h" -#include "video_core/host_shaders/smaa_edge_detection_vert.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_frag.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_vert.h" -#include "video_core/renderer_opengl/gl_fsr.h" +#include "video_core/renderer_opengl/gl_blit_screen.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/renderer_opengl.h" -#include "video_core/smaa_area_tex.h" -#include "video_core/smaa_search_tex.h" #include "video_core/textures/decoders.h" namespace OpenGL { namespace { -constexpr GLint PositionLocation = 0; -constexpr GLint TexCoordLocation = 1; -constexpr GLint ModelViewMatrixLocation = 0; - -struct ScreenRectVertex { - constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) - : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {} - - std::array<GLfloat, 2> position; - std::array<GLfloat, 2> tex_coord; -}; - -/** - * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left - * corner and (width, height) on the lower-bottom. - * - * The projection part of the matrix is trivial, hence these operations are represented - * by a 3x2 matrix. - */ -std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(float width, float height) { - std::array<GLfloat, 3 * 2> matrix; // Laid out in column-major order - - // clang-format off - matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f; - matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f; - // Last matrix row is implicitly assumed to be [0, 0, 1]. - // clang-format on - - return matrix; -} - const char* GetSource(GLenum source) { switch (source) { case GL_DEBUG_SOURCE_API: @@ -148,15 +95,13 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, emu_window{emu_window_}, device_memory{device_memory_}, gpu{gpu_}, device{emu_window_}, state_tracker{}, program_manager{device}, - rasterizer(emu_window, gpu, device_memory, device, screen_info, program_manager, - state_tracker) { + rasterizer(emu_window, gpu, device_memory, device, program_manager, state_tracker) { if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); glDebugMessageCallback(DebugHandler, nullptr); } AddTelemetryFields(); - InitOpenGLObjects(); // Initialize default attributes to match hardware's disabled attributes GLint max_attribs{}; @@ -168,27 +113,27 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) { glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS); } - // Enable unified vertex attributes and query vertex buffer address when the driver supports it + + // Enable unified vertex attributes when the driver supports it if (device.HasVertexBufferUnifiedMemory()) { glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV); glEnableClientState(GL_ELEMENT_ARRAY_UNIFIED_NV); - glMakeNamedBufferResidentNV(vertex_buffer.handle, GL_READ_ONLY); - glGetNamedBufferParameterui64vNV(vertex_buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, - &vertex_buffer_address); } + blit_screen = std::make_unique<BlitScreen>(rasterizer, device_memory, state_tracker, + program_manager, device); } RendererOpenGL::~RendererOpenGL() = default; -void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererOpenGL::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (framebuffers.empty()) { return; } - PrepareRendertarget(framebuffer); - RenderScreenshot(); + + RenderScreenshot(framebuffers); state_tracker.BindFramebuffer(0); - DrawScreen(emu_window.GetFramebufferLayout()); + blit_screen->DrawScreen(framebuffers, emu_window.GetFramebufferLayout()); ++m_current_frame; @@ -199,172 +144,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.OnFrameDisplayed(); } -void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { - return; - } - // If framebuffer is provided, reload it from memory to a texture - if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || - screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || - screen_info.texture.pixel_format != framebuffer->pixel_format || - gl_framebuffer_data.empty()) { - // Reallocate texture if the framebuffer size has changed. - // This is expected to not happen very often and hence should not be a - // performance problem. - ConfigureFramebufferTexture(screen_info.texture, *framebuffer); - } - - // Load the framebuffer from memory, draw it to the screen, and swap buffers - LoadFBToScreenInfo(*framebuffer); -} - -void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { - // Framebuffer orientation handling - framebuffer_transform_flags = framebuffer.transform_flags; - framebuffer_crop_rect = framebuffer.crop_rect; - framebuffer_width = framebuffer.width; - framebuffer_height = framebuffer.height; - - const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset}; - screen_info.was_accelerated = - rasterizer.AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride); - if (screen_info.was_accelerated) { - return; - } - - // Reset the screen info's display texture to its own permanent texture - screen_info.display_texture = screen_info.texture.resource.handle; - - // TODO(Rodrigo): Read this from HLE - constexpr u32 block_height_log2 = 4; - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; - const u64 size_in_bytes{Tegra::Texture::CalculateSize( - true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; - const u8* const host_ptr{device_memory.GetPointer<u8>(framebuffer_addr)}; - const std::span<const u8> input_data(host_ptr, size_in_bytes); - Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel, - framebuffer.width, framebuffer.height, 1, block_height_log2, - 0); - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride)); - - // Update existing texture - // TODO: Test what happens on hardware when you change the framebuffer dimensions so that - // they differ from the LCD resolution. - // TODO: Applications could theoretically crash yuzu here by specifying too large - // framebuffer sizes. We should make sure that this cannot happen. - glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width, - framebuffer.height, screen_info.texture.gl_format, - screen_info.texture.gl_type, gl_framebuffer_data.data()); - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); -} - -void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, - const TextureInfo& texture) { - const u8 framebuffer_data[4] = {color_a, color_b, color_g, color_r}; - glClearTexImage(texture.resource.handle, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data); -} - -void RendererOpenGL::InitOpenGLObjects() { - // Create shader programs - fxaa_vertex = CreateProgram(HostShaders::FXAA_VERT, GL_VERTEX_SHADER); - fxaa_fragment = CreateProgram(HostShaders::FXAA_FRAG, GL_FRAGMENT_SHADER); - - const auto replace_include = [](std::string& shader_source, std::string_view include_name, - std::string_view include_content) { - const std::string include_string = fmt::format("#include \"{}\"", include_name); - const std::size_t pos = shader_source.find(include_string); - ASSERT(pos != std::string::npos); - shader_source.replace(pos, include_string.size(), include_content); - }; - - const auto SmaaShader = [&](std::string_view specialized_source, GLenum stage) { - std::string shader_source{specialized_source}; - replace_include(shader_source, "opengl_smaa.glsl", HostShaders::OPENGL_SMAA_GLSL); - return CreateProgram(shader_source, stage); - }; - - smaa_edge_detection_vert = SmaaShader(HostShaders::SMAA_EDGE_DETECTION_VERT, GL_VERTEX_SHADER); - smaa_edge_detection_frag = - SmaaShader(HostShaders::SMAA_EDGE_DETECTION_FRAG, GL_FRAGMENT_SHADER); - smaa_blending_weight_calculation_vert = - SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_VERT, GL_VERTEX_SHADER); - smaa_blending_weight_calculation_frag = - SmaaShader(HostShaders::SMAA_BLENDING_WEIGHT_CALCULATION_FRAG, GL_FRAGMENT_SHADER); - smaa_neighborhood_blending_vert = - SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_VERT, GL_VERTEX_SHADER); - smaa_neighborhood_blending_frag = - SmaaShader(HostShaders::SMAA_NEIGHBORHOOD_BLENDING_FRAG, GL_FRAGMENT_SHADER); - - present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER); - present_bilinear_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); - present_bicubic_fragment = CreateProgram(HostShaders::PRESENT_BICUBIC_FRAG, GL_FRAGMENT_SHADER); - present_gaussian_fragment = - CreateProgram(HostShaders::PRESENT_GAUSSIAN_FRAG, GL_FRAGMENT_SHADER); - present_scaleforce_fragment = - CreateProgram(fmt::format("#version 460\n{}", HostShaders::OPENGL_PRESENT_SCALEFORCE_FRAG), - GL_FRAGMENT_SHADER); - - std::string fsr_source{HostShaders::OPENGL_FIDELITYFX_FSR_FRAG}; - replace_include(fsr_source, "ffx_a.h", HostShaders::FFX_A_H); - replace_include(fsr_source, "ffx_fsr1.h", HostShaders::FFX_FSR1_H); - - std::string fsr_easu_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_EASU_FRAG}; - std::string fsr_rcas_frag_source{HostShaders::OPENGL_FIDELITYFX_FSR_RCAS_FRAG}; - replace_include(fsr_easu_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); - replace_include(fsr_rcas_frag_source, "opengl_fidelityfx_fsr.frag", fsr_source); - - fsr = std::make_unique<FSR>(HostShaders::FULL_SCREEN_TRIANGLE_VERT, fsr_easu_frag_source, - fsr_rcas_frag_source); - - // Generate presentation sampler - present_sampler.Create(); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - - present_sampler_nn.Create(); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glSamplerParameteri(present_sampler_nn.handle, GL_TEXTURE_WRAP_R, GL_CLAMP_TO_EDGE); - - // Generate VBO handle for drawing - vertex_buffer.Create(); - - // Attach vertex data to VAO - glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); - - // Allocate textures for the screen - screen_info.texture.resource.Create(GL_TEXTURE_2D); - - const GLuint texture = screen_info.texture.resource.handle; - glTextureStorage2D(texture, 1, GL_RGBA8, 1, 1); - - screen_info.display_texture = screen_info.texture.resource.handle; - - // Clear screen to black - LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture); - - aa_framebuffer.Create(); - - smaa_area_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_area_tex.handle, 1, GL_RG8, AREATEX_WIDTH, AREATEX_HEIGHT); - glTextureSubImage2D(smaa_area_tex.handle, 0, 0, 0, AREATEX_WIDTH, AREATEX_HEIGHT, GL_RG, - GL_UNSIGNED_BYTE, areaTexBytes); - smaa_search_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_search_tex.handle, 1, GL_R8, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT); - glTextureSubImage2D(smaa_search_tex.handle, 0, 0, 0, SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT, GL_RED, - GL_UNSIGNED_BYTE, searchTexBytes); -} - void RendererOpenGL::AddTelemetryFields() { const char* const gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; const char* const gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; @@ -380,328 +159,7 @@ void RendererOpenGL::AddTelemetryFields() { telemetry_session.AddField(user_system, "GPU_OpenGL_Version", std::string(gl_version)); } -void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, - const Tegra::FramebufferConfig& framebuffer) { - texture.width = framebuffer.width; - texture.height = framebuffer.height; - texture.pixel_format = framebuffer.pixel_format; - - const auto pixel_format{ - VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)}; - const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)}; - gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel); - - GLint internal_format; - switch (framebuffer.pixel_format) { - case Service::android::PixelFormat::Rgba8888: - internal_format = GL_RGBA8; - texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - break; - case Service::android::PixelFormat::Rgb565: - internal_format = GL_RGB565; - texture.gl_format = GL_RGB; - texture.gl_type = GL_UNSIGNED_SHORT_5_6_5; - break; - default: - internal_format = GL_RGBA8; - texture.gl_format = GL_RGBA; - texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; - // UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - // static_cast<u32>(framebuffer.pixel_format)); - break; - } - - texture.resource.Release(); - texture.resource.Create(GL_TEXTURE_2D); - glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); - aa_texture.Release(); - aa_texture.Create(GL_TEXTURE_2D); - glTextureStorage2D(aa_texture.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(screen_info.texture.width), - Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, aa_texture.handle, 0); - smaa_edges_tex.Release(); - smaa_edges_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_edges_tex.handle, 1, GL_RG16F, - Settings::values.resolution_info.ScaleUp(screen_info.texture.width), - Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); - smaa_blend_tex.Release(); - smaa_blend_tex.Create(GL_TEXTURE_2D); - glTextureStorage2D(smaa_blend_tex.handle, 1, GL_RGBA16F, - Settings::values.resolution_info.ScaleUp(screen_info.texture.width), - Settings::values.resolution_info.ScaleUp(screen_info.texture.height)); -} - -void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { - // TODO: Signal state tracker about these changes - state_tracker.NotifyScreenDrawVertexArray(); - state_tracker.NotifyPolygonModes(); - state_tracker.NotifyViewport0(); - state_tracker.NotifyScissor0(); - state_tracker.NotifyColorMask(0); - state_tracker.NotifyBlend0(); - state_tracker.NotifyFramebuffer(); - state_tracker.NotifyFrontFace(); - state_tracker.NotifyCullTest(); - state_tracker.NotifyDepthTest(); - state_tracker.NotifyStencilTest(); - state_tracker.NotifyPolygonOffset(); - state_tracker.NotifyRasterizeEnable(); - state_tracker.NotifyFramebufferSRGB(); - state_tracker.NotifyLogicOp(); - state_tracker.NotifyClipControl(); - state_tracker.NotifyAlphaTest(); - - state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); - - glEnable(GL_CULL_FACE); - glDisable(GL_COLOR_LOGIC_OP); - glDisable(GL_DEPTH_TEST); - glDisable(GL_STENCIL_TEST); - glDisable(GL_POLYGON_OFFSET_FILL); - glDisable(GL_RASTERIZER_DISCARD); - glDisable(GL_ALPHA_TEST); - glDisablei(GL_BLEND, 0); - glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); - glCullFace(GL_BACK); - glFrontFace(GL_CW); - glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glDepthRangeIndexed(0, 0.0, 0.0); - - glBindTextureUnit(0, screen_info.display_texture); - - auto anti_aliasing = Settings::values.anti_aliasing.GetValue(); - if (anti_aliasing >= Settings::AntiAliasing::MaxEnum) { - LOG_ERROR(Render_OpenGL, "Invalid antialiasing option selected {}", anti_aliasing); - anti_aliasing = Settings::AntiAliasing::None; - Settings::values.anti_aliasing.SetValue(anti_aliasing); - } - - if (anti_aliasing != Settings::AntiAliasing::None) { - glEnablei(GL_SCISSOR_TEST, 0); - auto viewport_width = screen_info.texture.width; - auto scissor_width = framebuffer_crop_rect.GetWidth(); - if (scissor_width <= 0) { - scissor_width = viewport_width; - } - auto viewport_height = screen_info.texture.height; - auto scissor_height = framebuffer_crop_rect.GetHeight(); - if (scissor_height <= 0) { - scissor_height = viewport_height; - } - if (screen_info.was_accelerated) { - viewport_width = Settings::values.resolution_info.ScaleUp(viewport_width); - scissor_width = Settings::values.resolution_info.ScaleUp(scissor_width); - viewport_height = Settings::values.resolution_info.ScaleUp(viewport_height); - scissor_height = Settings::values.resolution_info.ScaleUp(scissor_height); - } - glScissorIndexed(0, 0, 0, scissor_width, scissor_height); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(viewport_width), - static_cast<GLfloat>(viewport_height)); - - glBindSampler(0, present_sampler.handle); - GLint old_read_fb; - GLint old_draw_fb; - glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); - - switch (anti_aliasing) { - case Settings::AntiAliasing::Fxaa: { - program_manager.BindPresentPrograms(fxaa_vertex.handle, fxaa_fragment.handle); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - } break; - case Settings::AntiAliasing::Smaa: { - glClearColor(0, 0, 0, 0); - glFrontFace(GL_CCW); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, aa_framebuffer.handle); - glBindSampler(1, present_sampler.handle); - glBindSampler(2, present_sampler.handle); - - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - smaa_edges_tex.handle, 0); - glClear(GL_COLOR_BUFFER_BIT); - program_manager.BindPresentPrograms(smaa_edge_detection_vert.handle, - smaa_edge_detection_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindTextureUnit(0, smaa_edges_tex.handle); - glBindTextureUnit(1, smaa_area_tex.handle); - glBindTextureUnit(2, smaa_search_tex.handle); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - smaa_blend_tex.handle, 0); - glClear(GL_COLOR_BUFFER_BIT); - program_manager.BindPresentPrograms(smaa_blending_weight_calculation_vert.handle, - smaa_blending_weight_calculation_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - - glBindTextureUnit(0, screen_info.display_texture); - glBindTextureUnit(1, smaa_blend_tex.handle); - glNamedFramebufferTexture(aa_framebuffer.handle, GL_COLOR_ATTACHMENT0, - aa_texture.handle, 0); - program_manager.BindPresentPrograms(smaa_neighborhood_blending_vert.handle, - smaa_neighborhood_blending_frag.handle); - glDrawArrays(GL_TRIANGLES, 0, 3); - glFrontFace(GL_CW); - } break; - default: - UNREACHABLE(); - } - - glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - - glBindTextureUnit(0, aa_texture.handle); - } - glDisablei(GL_SCISSOR_TEST, 0); - - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - if (!fsr->AreBuffersInitialized()) { - fsr->InitBuffers(); - } - - auto crop_rect = framebuffer_crop_rect; - if (crop_rect.GetWidth() == 0) { - crop_rect.right = framebuffer_width; - } - if (crop_rect.GetHeight() == 0) { - crop_rect.bottom = framebuffer_height; - } - crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); - const auto fsr_input_width = Settings::values.resolution_info.ScaleUp(framebuffer_width); - const auto fsr_input_height = Settings::values.resolution_info.ScaleUp(framebuffer_height); - glBindSampler(0, present_sampler.handle); - fsr->Draw(program_manager, layout.screen, fsr_input_width, fsr_input_height, crop_rect); - } else { - if (fsr->AreBuffersInitialized()) { - fsr->ReleaseBuffers(); - } - } - - const std::array ortho_matrix = - MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - - const auto fragment_handle = [this]() { - switch (Settings::values.scaling_filter.GetValue()) { - case Settings::ScalingFilter::NearestNeighbor: - case Settings::ScalingFilter::Bilinear: - return present_bilinear_fragment.handle; - case Settings::ScalingFilter::Bicubic: - return present_bicubic_fragment.handle; - case Settings::ScalingFilter::Gaussian: - return present_gaussian_fragment.handle; - case Settings::ScalingFilter::ScaleForce: - return present_scaleforce_fragment.handle; - case Settings::ScalingFilter::Fsr: - return fsr->GetPresentFragmentProgram().handle; - default: - return present_bilinear_fragment.handle; - } - }(); - program_manager.BindPresentPrograms(present_vertex.handle, fragment_handle); - glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE, - ortho_matrix.data()); - - const auto& texcoords = screen_info.display_texcoords; - auto left = texcoords.left; - auto right = texcoords.right; - if (framebuffer_transform_flags != Service::android::BufferTransformFlags::Unset) { - if (framebuffer_transform_flags == Service::android::BufferTransformFlags::FlipV) { - // Flip the framebuffer vertically - left = texcoords.right; - right = texcoords.left; - } else { - // Other transformations are unsupported - LOG_CRITICAL(Render_OpenGL, "Unsupported framebuffer_transform_flags={}", - framebuffer_transform_flags); - UNIMPLEMENTED(); - } - } - - ASSERT_MSG(framebuffer_crop_rect.left == 0, "Unimplemented"); - - f32 left_start{}; - if (framebuffer_crop_rect.Top() > 0) { - left_start = static_cast<f32>(framebuffer_crop_rect.Top()) / - static_cast<f32>(framebuffer_crop_rect.Bottom()); - } - f32 scale_u = static_cast<f32>(framebuffer_width) / static_cast<f32>(screen_info.texture.width); - f32 scale_v = - static_cast<f32>(framebuffer_height) / static_cast<f32>(screen_info.texture.height); - - if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::Fsr) { - // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering - // (e.g. handheld mode) on a 1920x1080 framebuffer. - if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / - static_cast<f32>(screen_info.texture.width); - } - if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / - static_cast<f32>(screen_info.texture.height); - } - } - if (Settings::values.anti_aliasing.GetValue() == Settings::AntiAliasing::Fxaa && - !screen_info.was_accelerated) { - scale_u /= Settings::values.resolution_info.up_factor; - scale_v /= Settings::values.resolution_info.up_factor; - } - - const auto& screen = layout.screen; - const std::array vertices = { - ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, - left_start + left * scale_v), - ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, - left_start + left * scale_v), - ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, - left_start + right * scale_v), - ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, - left_start + right * scale_v), - }; - glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - - glDisable(GL_FRAMEBUFFER_SRGB); - glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), - static_cast<GLfloat>(layout.height)); - - glEnableVertexAttribArray(PositionLocation); - glEnableVertexAttribArray(TexCoordLocation); - glVertexAttribDivisor(PositionLocation, 0); - glVertexAttribDivisor(TexCoordLocation, 0); - glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, position)); - glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, tex_coord)); - glVertexAttribBinding(PositionLocation, 0); - glVertexAttribBinding(TexCoordLocation, 0); - if (device.HasVertexBufferUnifiedMemory()) { - glBindVertexBuffer(0, 0, 0, sizeof(ScreenRectVertex)); - glBufferAddressRangeNV(GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV, 0, vertex_buffer_address, - sizeof(vertices)); - } else { - glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); - } - - if (Settings::values.scaling_filter.GetValue() != Settings::ScalingFilter::NearestNeighbor) { - glBindSampler(0, present_sampler.handle); - } else { - glBindSampler(0, present_sampler_nn.handle); - } - - // Update background color before drawing - glClearColor(Settings::values.bg_red.GetValue() / 255.0f, - Settings::values.bg_green.GetValue() / 255.0f, - Settings::values.bg_blue.GetValue() / 255.0f, 1.0f); - - glClear(GL_COLOR_BUFFER_BIT); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - - // TODO - // program_manager.RestoreGuestPipeline(); -} - -void RendererOpenGL::RenderScreenshot() { +void RendererOpenGL::RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers) { if (!renderer_settings.screenshot_requested) { return; } @@ -723,7 +181,7 @@ void RendererOpenGL::RenderScreenshot() { glRenderbufferStorage(GL_RENDERBUFFER, GL_SRGB8, layout.width, layout.height); glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer); - DrawScreen(layout); + blit_screen->DrawScreen(framebuffers, layout); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glPixelStorei(GL_PACK_ROW_LENGTH, 0); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index 18699610a..c4625c96e 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,7 +10,6 @@ #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_fsr.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -25,37 +24,13 @@ namespace Core::Frontend { class EmuWindow; } -namespace Core::Memory { -class Memory; -} - -namespace Layout { -struct FramebufferLayout; -} - namespace Tegra { class GPU; } namespace OpenGL { -/// Structure used for storing information about the textures for the Switch screen -struct TextureInfo { - OGLTexture resource; - GLsizei width; - GLsizei height; - GLenum gl_format; - GLenum gl_type; - Service::android::PixelFormat pixel_format; -}; - -/// Structure used for storing information about the display target for the Switch screen -struct ScreenInfo { - GLuint display_texture{}; - bool was_accelerated = false; - const Common::Rectangle<float> display_texcoords{0.0f, 0.0f, 1.0f, 1.0f}; - TextureInfo texture; -}; +class BlitScreen; class RendererOpenGL final : public VideoCore::RendererBase { public: @@ -65,7 +40,7 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererOpenGL() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; @@ -76,28 +51,8 @@ public: } private: - /// Initializes the OpenGL state and creates persistent objects. - void InitOpenGLObjects(); - void AddTelemetryFields(); - - void ConfigureFramebufferTexture(TextureInfo& texture, - const Tegra::FramebufferConfig& framebuffer); - - /// Draws the emulated screens to the emulator window. - void DrawScreen(const Layout::FramebufferLayout& layout); - - void RenderScreenshot(); - - /// Loads framebuffer from emulated memory into the active OpenGL texture. - void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); - - /// Fills active OpenGL texture with the given RGB color.Since the color is solid, the texture - /// can be 1x1 but will stretch across whatever it's rendered on. - void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, - const TextureInfo& texture); - - void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers); Core::TelemetrySession& telemetry_session; Core::Frontend::EmuWindow& emu_window; @@ -108,49 +63,9 @@ private: StateTracker state_tracker; ProgramManager program_manager; RasterizerOpenGL rasterizer; - - // OpenGL object IDs - OGLSampler present_sampler; - OGLSampler present_sampler_nn; - OGLBuffer vertex_buffer; - OGLProgram fxaa_vertex; - OGLProgram fxaa_fragment; - OGLProgram present_vertex; - OGLProgram present_bilinear_fragment; - OGLProgram present_bicubic_fragment; - OGLProgram present_gaussian_fragment; - OGLProgram present_scaleforce_fragment; OGLFramebuffer screenshot_framebuffer; - // GPU address of the vertex buffer - GLuint64EXT vertex_buffer_address = 0; - - /// Display information for Switch screen - ScreenInfo screen_info; - OGLTexture aa_texture; - OGLFramebuffer aa_framebuffer; - - OGLProgram smaa_edge_detection_vert; - OGLProgram smaa_blending_weight_calculation_vert; - OGLProgram smaa_neighborhood_blending_vert; - OGLProgram smaa_edge_detection_frag; - OGLProgram smaa_blending_weight_calculation_frag; - OGLProgram smaa_neighborhood_blending_frag; - OGLTexture smaa_area_tex; - OGLTexture smaa_search_tex; - OGLTexture smaa_edges_tex; - OGLTexture smaa_blend_tex; - - std::unique_ptr<FSR> fsr; - - /// OpenGL framebuffer data - std::vector<u8> gl_framebuffer_data; - - /// Used for transforming the framebuffer orientation - Service::android::BufferTransformFlags framebuffer_transform_flags{}; - Common::Rectangle<int> framebuffer_crop_rect; - u32 framebuffer_width; - u32 framebuffer_height; + std::unique_ptr<BlitScreen> blit_screen; }; } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/present/anti_alias_pass.h b/src/video_core/renderer_vulkan/present/anti_alias_pass.h new file mode 100644 index 000000000..1f20fbd7f --- /dev/null +++ b/src/video_core/renderer_vulkan/present/anti_alias_pass.h @@ -0,0 +1,25 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Scheduler; + +class AntiAliasPass { +public: + virtual ~AntiAliasPass() = default; + virtual void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) = 0; +}; + +class NoAA final : public AntiAliasPass { +public: + void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) override {} +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/filters.cpp b/src/video_core/renderer_vulkan/present/filters.cpp new file mode 100644 index 000000000..b5e08938e --- /dev/null +++ b/src/video_core/renderer_vulkan/present/filters.cpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/common_types.h" + +#include "video_core/host_shaders/present_bicubic_frag_spv.h" +#include "video_core/host_shaders/present_gaussian_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" +#include "video_core/renderer_vulkan/present/filters.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +namespace { + +vk::ShaderModule SelectScaleForceShader(const Device& device) { + if (device.IsFloat16Supported()) { + return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); + } else { + return BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); + } +} + +} // Anonymous namespace + +std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device, VkFormat frame_format) { + return std::make_unique<WindowAdaptPass>(device, frame_format, + CreateNearestNeighborSampler(device), + BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); +} + +std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device, VkFormat frame_format) { + return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device), + BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); +} + +std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device, VkFormat frame_format) { + return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device), + BuildShader(device, PRESENT_BICUBIC_FRAG_SPV)); +} + +std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device, VkFormat frame_format) { + return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device), + BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV)); +} + +std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device, VkFormat frame_format) { + return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device), + SelectScaleForceShader(device)); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/filters.h b/src/video_core/renderer_vulkan/present/filters.h new file mode 100644 index 000000000..6c83726dd --- /dev/null +++ b/src/video_core/renderer_vulkan/present/filters.h @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_vulkan/present/window_adapt_pass.h" + +namespace Vulkan { + +class MemoryAllocator; + +std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device, VkFormat frame_format); +std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device, VkFormat frame_format); +std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device, VkFormat frame_format); +std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device, VkFormat frame_format); +std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device, VkFormat frame_format); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fsr.cpp b/src/video_core/renderer_vulkan/present/fsr.cpp new file mode 100644 index 000000000..3f708be70 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/fsr.cpp @@ -0,0 +1,226 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/settings.h" + +#include "video_core/fsr.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_frag_spv.h" +#include "video_core/host_shaders/vulkan_fidelityfx_fsr_vert_spv.h" +#include "video_core/renderer_vulkan/present/fsr.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { +using namespace FSR; + +using PushConstants = std::array<u32, 4 * 4>; + +FSR::FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, + VkExtent2D extent) + : m_device{device}, m_memory_allocator{memory_allocator}, + m_image_count{image_count}, m_extent{extent} { + + CreateImages(); + CreateRenderPasses(); + CreateSampler(); + CreateShaders(); + CreateDescriptorPool(); + CreateDescriptorSetLayout(); + CreateDescriptorSets(); + CreatePipelineLayouts(); + CreatePipelines(); +} + +void FSR::CreateImages() { + m_dynamic_images.resize(m_image_count); + for (auto& images : m_dynamic_images) { + images.images[Easu] = + CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Rcas] = + CreateWrappedImage(m_memory_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Easu] = + CreateWrappedImageView(m_device, images.images[Easu], VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Rcas] = + CreateWrappedImageView(m_device, images.images[Rcas], VK_FORMAT_R16G16B16A16_SFLOAT); + } +} + +void FSR::CreateRenderPasses() { + m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + + for (auto& images : m_dynamic_images) { + images.framebuffers[Easu] = + CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Easu], m_extent); + images.framebuffers[Rcas] = + CreateWrappedFramebuffer(m_device, m_renderpass, images.image_views[Rcas], m_extent); + } +} + +void FSR::CreateSampler() { + m_sampler = CreateBilinearSampler(m_device); +} + +void FSR::CreateShaders() { + m_vert_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_VERT_SPV); + + if (m_device.IsFloat16Supported()) { + m_easu_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_EASU_FP16_FRAG_SPV); + m_rcas_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_FRAG_SPV); + } else { + m_easu_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_EASU_FP32_FRAG_SPV); + m_rcas_shader = BuildShader(m_device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_FRAG_SPV); + } +} + +void FSR::CreateDescriptorPool() { + // EASU: 1 descriptor + // RCAS: 1 descriptor + // 2 descriptors, 2 descriptor sets per invocation + m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 2 * m_image_count, 2 * m_image_count); +} + +void FSR::CreateDescriptorSetLayout() { + m_descriptor_set_layout = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); +} + +void FSR::CreateDescriptorSets() { + std::vector<VkDescriptorSetLayout> layouts(MaxFsrStage, *m_descriptor_set_layout); + + for (auto& images : m_dynamic_images) { + images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); + } +} + +void FSR::CreatePipelineLayouts() { + const VkPushConstantRange range{ + .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, + .offset = 0, + .size = sizeof(PushConstants), + }; + VkPipelineLayoutCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = m_descriptor_set_layout.address(), + .pushConstantRangeCount = 1, + .pPushConstantRanges = &range, + }; + + m_pipeline_layout = m_device.GetLogical().CreatePipelineLayout(ci); +} + +void FSR::CreatePipelines() { + m_easu_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, + std::tie(m_vert_shader, m_easu_shader)); + m_rcas_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, + std::tie(m_vert_shader, m_rcas_shader)); +} + +void FSR::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { + Images& images = m_dynamic_images[image_index]; + std::vector<VkDescriptorImageInfo> image_infos; + std::vector<VkWriteDescriptorSet> updates; + image_infos.reserve(2); + + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, + images.descriptor_sets[Easu], 0)); + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Easu], + images.descriptor_sets[Rcas], 0)); + + m_device.GetLogical().UpdateDescriptorSets(updates, {}); +} + +void FSR::UploadImages(Scheduler& scheduler) { + if (m_images_ready) { + return; + } + + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + for (auto& image : m_dynamic_images) { + ClearColorImage(cmdbuf, *image.images[Easu]); + ClearColorImage(cmdbuf, *image.images[Rcas]); + } + }); + scheduler.Finish(); + + m_images_ready = true; +} + +VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view, VkExtent2D input_image_extent, + const Common::Rectangle<f32>& crop_rect) { + Images& images = m_dynamic_images[image_index]; + + VkImage easu_image = *images.images[Easu]; + VkImage rcas_image = *images.images[Rcas]; + VkDescriptorSet easu_descriptor_set = images.descriptor_sets[Easu]; + VkDescriptorSet rcas_descriptor_set = images.descriptor_sets[Rcas]; + VkFramebuffer easu_framebuffer = *images.framebuffers[Easu]; + VkFramebuffer rcas_framebuffer = *images.framebuffers[Rcas]; + VkPipeline easu_pipeline = *m_easu_pipeline; + VkPipeline rcas_pipeline = *m_rcas_pipeline; + VkPipelineLayout pipeline_layout = *m_pipeline_layout; + VkRenderPass renderpass = *m_renderpass; + VkExtent2D extent = m_extent; + + const f32 input_image_width = static_cast<f32>(input_image_extent.width); + const f32 input_image_height = static_cast<f32>(input_image_extent.height); + const f32 output_image_width = static_cast<f32>(extent.width); + const f32 output_image_height = static_cast<f32>(extent.height); + const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width; + const f32 viewport_x = crop_rect.left * input_image_width; + const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height; + const f32 viewport_y = crop_rect.top * input_image_height; + + PushConstants easu_con{}; + PushConstants rcas_con{}; + FsrEasuConOffset(easu_con.data() + 0, easu_con.data() + 4, easu_con.data() + 8, + easu_con.data() + 12, viewport_width, viewport_height, input_image_width, + input_image_height, output_image_width, output_image_height, viewport_x, + viewport_y); + + const float sharpening = + static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; + FsrRcasCon(rcas_con.data(), sharpening); + + UploadImages(scheduler); + UpdateDescriptorSets(source_image_view, image_index); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([=](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, easu_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, renderpass, easu_framebuffer, extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, easu_pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, + easu_descriptor_set, {}); + cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, easu_con); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, easu_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, rcas_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, renderpass, rcas_framebuffer, extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, rcas_pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, + rcas_descriptor_set, {}); + cmdbuf.PushConstants(pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, rcas_con); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, rcas_image, VK_IMAGE_LAYOUT_GENERAL); + }); + + return *images.image_views[Rcas]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fsr.h b/src/video_core/renderer_vulkan/present/fsr.h new file mode 100644 index 000000000..8602e8146 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/fsr.h @@ -0,0 +1,69 @@ +// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class Scheduler; + +class FSR { +public: + explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, + VkExtent2D extent); + VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, + VkImageView source_image_view, VkExtent2D input_image_extent, + const Common::Rectangle<f32>& crop_rect); + +private: + void CreateImages(); + void CreateRenderPasses(); + void CreateSampler(); + void CreateShaders(); + void CreateDescriptorPool(); + void CreateDescriptorSetLayout(); + void CreateDescriptorSets(); + void CreatePipelineLayouts(); + void CreatePipelines(); + + void UploadImages(Scheduler& scheduler); + void UpdateDescriptorSets(VkImageView image_view, size_t image_index); + + const Device& m_device; + MemoryAllocator& m_memory_allocator; + const size_t m_image_count; + const VkExtent2D m_extent; + + enum FsrStage { + Easu, + Rcas, + MaxFsrStage, + }; + + vk::DescriptorPool m_descriptor_pool; + vk::DescriptorSetLayout m_descriptor_set_layout; + vk::PipelineLayout m_pipeline_layout; + vk::ShaderModule m_vert_shader; + vk::ShaderModule m_easu_shader; + vk::ShaderModule m_rcas_shader; + vk::Pipeline m_easu_pipeline; + vk::Pipeline m_rcas_pipeline; + vk::RenderPass m_renderpass; + vk::Sampler m_sampler; + + struct Images { + vk::DescriptorSets descriptor_sets; + std::array<vk::Image, MaxFsrStage> images; + std::array<vk::ImageView, MaxFsrStage> image_views; + std::array<vk::Framebuffer, MaxFsrStage> framebuffers; + }; + std::vector<Images> m_dynamic_images; + bool m_images_ready{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fxaa.cpp b/src/video_core/renderer_vulkan/present/fxaa.cpp new file mode 100644 index 000000000..bdafd1f4d --- /dev/null +++ b/src/video_core/renderer_vulkan/present/fxaa.cpp @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "common/common_types.h" + +#include "video_core/host_shaders/fxaa_frag_spv.h" +#include "video_core/host_shaders/fxaa_vert_spv.h" +#include "video_core/renderer_vulkan/present/fxaa.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" + +namespace Vulkan { + +FXAA::FXAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent) + : m_device(device), m_allocator(allocator), m_extent(extent), + m_image_count(static_cast<u32>(image_count)) { + CreateImages(); + CreateRenderPasses(); + CreateSampler(); + CreateShaders(); + CreateDescriptorPool(); + CreateDescriptorSetLayouts(); + CreateDescriptorSets(); + CreatePipelineLayouts(); + CreatePipelines(); +} + +FXAA::~FXAA() = default; + +void FXAA::CreateImages() { + for (u32 i = 0; i < m_image_count; i++) { + Image& image = m_dynamic_images.emplace_back(); + + image.image = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + image.image_view = + CreateWrappedImageView(m_device, image.image, VK_FORMAT_R16G16B16A16_SFLOAT); + } +} + +void FXAA::CreateRenderPasses() { + m_renderpass = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + + for (auto& image : m_dynamic_images) { + image.framebuffer = + CreateWrappedFramebuffer(m_device, m_renderpass, image.image_view, m_extent); + } +} + +void FXAA::CreateSampler() { + m_sampler = CreateWrappedSampler(m_device); +} + +void FXAA::CreateShaders() { + m_vertex_shader = CreateWrappedShaderModule(m_device, FXAA_VERT_SPV); + m_fragment_shader = CreateWrappedShaderModule(m_device, FXAA_FRAG_SPV); +} + +void FXAA::CreateDescriptorPool() { + // 2 descriptors, 1 descriptor set per image + m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 2 * m_image_count, m_image_count); +} + +void FXAA::CreateDescriptorSetLayouts() { + m_descriptor_set_layout = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); +} + +void FXAA::CreateDescriptorSets() { + VkDescriptorSetLayout layout = *m_descriptor_set_layout; + + for (auto& images : m_dynamic_images) { + images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, {layout}); + } +} + +void FXAA::CreatePipelineLayouts() { + m_pipeline_layout = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layout); +} + +void FXAA::CreatePipelines() { + m_pipeline = CreateWrappedPipeline(m_device, m_renderpass, m_pipeline_layout, + std::tie(m_vertex_shader, m_fragment_shader)); +} + +void FXAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { + Image& image = m_dynamic_images[image_index]; + std::vector<VkDescriptorImageInfo> image_infos; + std::vector<VkWriteDescriptorSet> updates; + image_infos.reserve(2); + + updates.push_back( + CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, image.descriptor_sets[0], 0)); + updates.push_back( + CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, image.descriptor_sets[0], 1)); + + m_device.GetLogical().UpdateDescriptorSets(updates, {}); +} + +void FXAA::UploadImages(Scheduler& scheduler) { + if (m_images_ready) { + return; + } + + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + for (auto& image : m_dynamic_images) { + ClearColorImage(cmdbuf, *image.image); + } + }); + scheduler.Finish(); + + m_images_ready = true; +} + +void FXAA::Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) { + const Image& image{m_dynamic_images[image_index]}; + const VkImage input_image{*inout_image}; + const VkImage output_image{*image.image}; + const VkDescriptorSet descriptor_set{image.descriptor_sets[0]}; + const VkFramebuffer framebuffer{*image.framebuffer}; + const VkRenderPass renderpass{*m_renderpass}; + const VkPipeline pipeline{*m_pipeline}; + const VkPipelineLayout layout{*m_pipeline_layout}; + const VkExtent2D extent{m_extent}; + + UploadImages(scheduler); + UpdateDescriptorSets(*inout_image_view, image_index); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([=](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, input_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, renderpass, framebuffer, extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, {}); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + }); + + *inout_image = *image.image; + *inout_image_view = *image.image_view; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/fxaa.h b/src/video_core/renderer_vulkan/present/fxaa.h new file mode 100644 index 000000000..97a2e5c1c --- /dev/null +++ b/src/video_core/renderer_vulkan/present/fxaa.h @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_vulkan/present/anti_alias_pass.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +class Device; +class Scheduler; +class StagingBufferPool; + +class FXAA final : public AntiAliasPass { +public: + explicit FXAA(const Device& device, MemoryAllocator& allocator, size_t image_count, + VkExtent2D extent); + ~FXAA() override; + + void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) override; + +private: + void CreateImages(); + void CreateRenderPasses(); + void CreateSampler(); + void CreateShaders(); + void CreateDescriptorPool(); + void CreateDescriptorSetLayouts(); + void CreateDescriptorSets(); + void CreatePipelineLayouts(); + void CreatePipelines(); + void UpdateDescriptorSets(VkImageView image_view, size_t image_index); + void UploadImages(Scheduler& scheduler); + + const Device& m_device; + MemoryAllocator& m_allocator; + const VkExtent2D m_extent; + const u32 m_image_count; + + vk::ShaderModule m_vertex_shader{}; + vk::ShaderModule m_fragment_shader{}; + vk::DescriptorPool m_descriptor_pool{}; + vk::DescriptorSetLayout m_descriptor_set_layout{}; + vk::PipelineLayout m_pipeline_layout{}; + vk::Pipeline m_pipeline{}; + vk::RenderPass m_renderpass{}; + + struct Image { + vk::DescriptorSets descriptor_sets{}; + vk::Framebuffer framebuffer{}; + vk::Image image{}; + vk::ImageView image_view{}; + }; + std::vector<Image> m_dynamic_images{}; + bool m_images_ready{}; + + vk::Sampler m_sampler{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp new file mode 100644 index 000000000..cfc04be44 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -0,0 +1,336 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "video_core/renderer_vulkan/vk_rasterizer.h" + +#include "common/settings.h" +#include "video_core/framebuffer_config.h" +#include "video_core/renderer_vulkan/present/fsr.h" +#include "video_core/renderer_vulkan/present/fxaa.h" +#include "video_core/renderer_vulkan/present/layer.h" +#include "video_core/renderer_vulkan/present/present_push_constants.h" +#include "video_core/renderer_vulkan/present/smaa.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_blit_screen.h" +#include "video_core/textures/decoders.h" + +namespace Vulkan { + +namespace { + +u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { + using namespace VideoCore::Surface; + return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); +} + +std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { + return static_cast<std::size_t>(framebuffer.stride) * + static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer); +} + +VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { + switch (framebuffer.pixel_format) { + case Service::android::PixelFormat::Rgba8888: + case Service::android::PixelFormat::Rgbx8888: + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + case Service::android::PixelFormat::Rgb565: + return VK_FORMAT_R5G6B5_UNORM_PACK16; + case Service::android::PixelFormat::Bgra8888: + return VK_FORMAT_B8G8R8A8_UNORM; + default: + UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", + static_cast<u32>(framebuffer.pixel_format)); + return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + } +} + +} // Anonymous namespace + +Layer::Layer(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_, + Tegra::MaxwellDeviceMemoryManager& device_memory_, size_t image_count_, + VkExtent2D output_size, VkDescriptorSetLayout layout) + : device(device_), memory_allocator(memory_allocator_), scheduler(scheduler_), + device_memory(device_memory_), image_count(image_count_) { + CreateDescriptorPool(); + CreateDescriptorSets(layout); + if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { + CreateFSR(output_size); + } +} + +Layer::~Layer() { + ReleaseRawImages(); +} + +void Layer::ConfigureDraw(PresentPushConstants* out_push_constants, + VkDescriptorSet* out_descriptor_set, RasterizerVulkan& rasterizer, + VkSampler sampler, size_t image_index, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout) { + const auto texture_info = rasterizer.AccelerateDisplay( + framebuffer, framebuffer.address + framebuffer.offset, framebuffer.stride); + const u32 texture_width = texture_info ? texture_info->width : framebuffer.width; + const u32 texture_height = texture_info ? texture_info->height : framebuffer.height; + const u32 scaled_width = texture_info ? texture_info->scaled_width : texture_width; + const u32 scaled_height = texture_info ? texture_info->scaled_height : texture_height; + const bool use_accelerated = texture_info.has_value(); + + RefreshResources(framebuffer); + SetAntiAliasPass(); + + // Finish any pending renderpass + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Wait(resource_ticks[image_index]); + SCOPE_EXIT({ resource_ticks[image_index] = scheduler.CurrentTick(); }); + + if (!use_accelerated) { + UpdateRawImage(framebuffer, image_index); + } + + VkImage source_image = texture_info ? texture_info->image : *raw_images[image_index]; + VkImageView source_image_view = + texture_info ? texture_info->image_view : *raw_image_views[image_index]; + + anti_alias->Draw(scheduler, image_index, &source_image, &source_image_view); + + auto crop_rect = Tegra::NormalizeCrop(framebuffer, texture_width, texture_height); + const VkExtent2D render_extent{ + .width = scaled_width, + .height = scaled_height, + }; + + if (fsr) { + source_image_view = fsr->Draw(scheduler, image_index, source_image, source_image_view, + render_extent, crop_rect); + crop_rect = {0, 0, 1, 1}; + } + + SetMatrixData(*out_push_constants, layout); + SetVertexData(*out_push_constants, layout, crop_rect); + + UpdateDescriptorSet(source_image_view, sampler, image_index); + *out_descriptor_set = descriptor_sets[image_index]; +} + +void Layer::CreateDescriptorPool() { + descriptor_pool = CreateWrappedDescriptorPool(device, image_count, image_count); +} + +void Layer::CreateDescriptorSets(VkDescriptorSetLayout layout) { + const std::vector layouts(image_count, layout); + descriptor_sets = CreateWrappedDescriptorSets(descriptor_pool, layouts); +} + +void Layer::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { + const VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = CalculateBufferSize(framebuffer), + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + + buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); +} + +void Layer::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { + const auto format = GetFormat(framebuffer); + resource_ticks.resize(image_count); + raw_images.resize(image_count); + raw_image_views.resize(image_count); + + for (size_t i = 0; i < image_count; ++i) { + raw_images[i] = + CreateWrappedImage(memory_allocator, {framebuffer.width, framebuffer.height}, format); + raw_image_views[i] = CreateWrappedImageView(device, raw_images[i], format); + } +} + +void Layer::CreateFSR(VkExtent2D output_size) { + fsr = std::make_unique<FSR>(device, memory_allocator, image_count, output_size); +} + +void Layer::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { + if (framebuffer.width == raw_width && framebuffer.height == raw_height && + framebuffer.pixel_format == pixel_format && !raw_images.empty()) { + return; + } + + raw_width = framebuffer.width; + raw_height = framebuffer.height; + pixel_format = framebuffer.pixel_format; + anti_alias.reset(); + + ReleaseRawImages(); + CreateStagingBuffer(framebuffer); + CreateRawImages(framebuffer); +} + +void Layer::SetAntiAliasPass() { + if (anti_alias && anti_alias_setting == Settings::values.anti_aliasing.GetValue()) { + return; + } + + anti_alias_setting = Settings::values.anti_aliasing.GetValue(); + + const VkExtent2D render_area{ + .width = Settings::values.resolution_info.ScaleUp(raw_width), + .height = Settings::values.resolution_info.ScaleUp(raw_height), + }; + + switch (anti_alias_setting) { + case Settings::AntiAliasing::Fxaa: + anti_alias = std::make_unique<FXAA>(device, memory_allocator, image_count, render_area); + break; + case Settings::AntiAliasing::Smaa: + anti_alias = std::make_unique<SMAA>(device, memory_allocator, image_count, render_area); + break; + default: + anti_alias = std::make_unique<NoAA>(); + break; + } +} + +void Layer::ReleaseRawImages() { + for (const u64 tick : resource_ticks) { + scheduler.Wait(tick); + } + raw_images.clear(); + buffer.reset(); +} + +u64 Layer::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { + return GetSizeInBytes(framebuffer) * image_count; +} + +u64 Layer::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, + size_t image_index) const { + return GetSizeInBytes(framebuffer) * image_index; +} + +void Layer::SetMatrixData(PresentPushConstants& data, + const Layout::FramebufferLayout& layout) const { + data.modelview_matrix = + MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height)); +} + +void Layer::SetVertexData(PresentPushConstants& data, const Layout::FramebufferLayout& layout, + const Common::Rectangle<f32>& crop) const { + // Map the coordinates to the screen. + const auto& screen = layout.screen; + const auto x = static_cast<f32>(screen.left); + const auto y = static_cast<f32>(screen.top); + const auto w = static_cast<f32>(screen.GetWidth()); + const auto h = static_cast<f32>(screen.GetHeight()); + + data.vertices[0] = ScreenRectVertex(x, y, crop.left, crop.top); + data.vertices[1] = ScreenRectVertex(x + w, y, crop.right, crop.top); + data.vertices[2] = ScreenRectVertex(x, y + h, crop.left, crop.bottom); + data.vertices[3] = ScreenRectVertex(x + w, y + h, crop.right, crop.bottom); +} + +void Layer::UpdateDescriptorSet(VkImageView image_view, VkSampler sampler, size_t image_index) { + const VkDescriptorImageInfo image_info{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = VK_IMAGE_LAYOUT_GENERAL, + }; + + const VkWriteDescriptorSet sampler_write{ + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .pNext = nullptr, + .dstSet = descriptor_sets[image_index], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .pImageInfo = &image_info, + .pBufferInfo = nullptr, + .pTexelBufferView = nullptr, + }; + + device.GetLogical().UpdateDescriptorSets(std::array{sampler_write}, {}); +} + +void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t image_index) { + const std::span<u8> mapped_span = buffer.Mapped(); + + const u64 image_offset = GetRawImageOffset(framebuffer, image_index); + + const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; + const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr); + + // TODO(Rodrigo): Read this from HLE + constexpr u32 block_height_log2 = 4; + const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + const u64 linear_size{GetSizeInBytes(framebuffer)}; + const u64 tiled_size{Tegra::Texture::CalculateSize( + true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)}; + Tegra::Texture::UnswizzleTexture( + mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), + bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + + const VkBufferImageCopy copy{ + .bufferOffset = image_offset, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = + { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset = {.x = 0, .y = 0, .z = 0}, + .imageExtent = + { + .width = framebuffer.width, + .height = framebuffer.height, + .depth = 1, + }, + }; + scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { + const VkImage image = *raw_images[index]; + const VkImageMemoryBarrier base_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = 0, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }, + }; + VkImageMemoryBarrier read_barrier = base_barrier; + read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + read_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + VkImageMemoryBarrier write_barrier = base_barrier; + write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + write_barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + 0, write_barrier); + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/layer.h b/src/video_core/renderer_vulkan/present/layer.h new file mode 100644 index 000000000..88d43fc5f --- /dev/null +++ b/src/video_core/renderer_vulkan/present/layer.h @@ -0,0 +1,92 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/math_util.h" +#include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Tegra { +struct FramebufferConfig; +} + +namespace Service::android { +enum class PixelFormat : u32; +} + +namespace Settings { +enum class AntiAliasing : u32; +} + +namespace Vulkan { + +class AntiAliasPass; +class Device; +class FSR; +class MemoryAllocator; +struct PresentPushConstants; +class RasterizerVulkan; +class Scheduler; + +class Layer final { +public: + explicit Layer(const Device& device, MemoryAllocator& memory_allocator, Scheduler& scheduler, + Tegra::MaxwellDeviceMemoryManager& device_memory, size_t image_count, + VkExtent2D output_size, VkDescriptorSetLayout layout); + ~Layer(); + + void ConfigureDraw(PresentPushConstants* out_push_constants, + VkDescriptorSet* out_descriptor_set, RasterizerVulkan& rasterizer, + VkSampler sampler, size_t image_index, + const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout& layout); + +private: + void CreateDescriptorPool(); + void CreateDescriptorSets(VkDescriptorSetLayout layout); + void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); + void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); + void CreateFSR(VkExtent2D output_size); + + void RefreshResources(const Tegra::FramebufferConfig& framebuffer); + void SetAntiAliasPass(); + void ReleaseRawImages(); + + u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; + u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, size_t image_index) const; + + void SetMatrixData(PresentPushConstants& data, const Layout::FramebufferLayout& layout) const; + void SetVertexData(PresentPushConstants& data, const Layout::FramebufferLayout& layout, + const Common::Rectangle<f32>& crop) const; + void UpdateDescriptorSet(VkImageView image_view, VkSampler sampler, size_t image_index); + void UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t image_index); + +private: + const Device& device; + MemoryAllocator& memory_allocator; + Scheduler& scheduler; + Tegra::MaxwellDeviceMemoryManager& device_memory; + const size_t image_count{}; + vk::DescriptorPool descriptor_pool{}; + vk::DescriptorSets descriptor_sets{}; + + vk::Buffer buffer{}; + std::vector<vk::Image> raw_images{}; + std::vector<vk::ImageView> raw_image_views{}; + u32 raw_width{}; + u32 raw_height{}; + Service::android::PixelFormat pixel_format{}; + + Settings::AntiAliasing anti_alias_setting{}; + std::unique_ptr<AntiAliasPass> anti_alias{}; + + std::unique_ptr<FSR> fsr{}; + std::vector<u64> resource_ticks{}; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/present_push_constants.h b/src/video_core/renderer_vulkan/present/present_push_constants.h new file mode 100644 index 000000000..f1949e7aa --- /dev/null +++ b/src/video_core/renderer_vulkan/present/present_push_constants.h @@ -0,0 +1,34 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "common/common_types.h" + +namespace Vulkan { + +struct ScreenRectVertex { + ScreenRectVertex() = default; + explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} + + std::array<f32, 2> position; + std::array<f32, 2> tex_coord; +}; + +static inline std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { + // clang-format off + return { 2.f / width, 0.f, 0.f, 0.f, + 0.f, 2.f / height, 0.f, 0.f, + 0.f, 0.f, 1.f, 0.f, + -1.f, -1.f, 0.f, 1.f}; + // clang-format on +} + +struct PresentPushConstants { + std::array<f32, 4 * 4> modelview_matrix; + std::array<ScreenRectVertex, 4> vertices; +}; + +static_assert(sizeof(PresentPushConstants) <= 128, "Push constants are too large"); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/smaa.cpp b/src/video_core/renderer_vulkan/present/smaa.cpp new file mode 100644 index 000000000..39645fd1d --- /dev/null +++ b/src/video_core/renderer_vulkan/present/smaa.cpp @@ -0,0 +1,277 @@ +// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <list> + +#include "common/assert.h" +#include "common/polyfill_ranges.h" + +#include "video_core/renderer_vulkan/present/smaa.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/smaa_area_tex.h" +#include "video_core/smaa_search_tex.h" +#include "video_core/vulkan_common/vulkan_device.h" + +#include "video_core/host_shaders/smaa_blending_weight_calculation_frag_spv.h" +#include "video_core/host_shaders/smaa_blending_weight_calculation_vert_spv.h" +#include "video_core/host_shaders/smaa_edge_detection_frag_spv.h" +#include "video_core/host_shaders/smaa_edge_detection_vert_spv.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_frag_spv.h" +#include "video_core/host_shaders/smaa_neighborhood_blending_vert_spv.h" + +namespace Vulkan { + +SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent) + : m_device(device), m_allocator(allocator), m_extent(extent), + m_image_count(static_cast<u32>(image_count)) { + CreateImages(); + CreateRenderPasses(); + CreateSampler(); + CreateShaders(); + CreateDescriptorPool(); + CreateDescriptorSetLayouts(); + CreateDescriptorSets(); + CreatePipelineLayouts(); + CreatePipelines(); +} + +SMAA::~SMAA() = default; + +void SMAA::CreateImages() { + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + + m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); + m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM); + + m_static_image_views[Area] = + CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); + m_static_image_views[Search] = + CreateWrappedImageView(m_device, m_static_images[Search], VK_FORMAT_R8_UNORM); + + for (u32 i = 0; i < m_image_count; i++) { + Images& images = m_dynamic_images.emplace_back(); + + images.images[Blend] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); + images.images[Output] = + CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); + + images.image_views[Blend] = + CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); + images.image_views[Edges] = + CreateWrappedImageView(m_device, images.images[Edges], VK_FORMAT_R16G16_SFLOAT); + images.image_views[Output] = + CreateWrappedImageView(m_device, images.images[Output], VK_FORMAT_R16G16B16A16_SFLOAT); + } +} + +void SMAA::CreateRenderPasses() { + m_renderpasses[EdgeDetection] = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16_SFLOAT); + m_renderpasses[BlendingWeightCalculation] = + CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + m_renderpasses[NeighborhoodBlending] = + CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); + + for (auto& images : m_dynamic_images) { + images.framebuffers[EdgeDetection] = CreateWrappedFramebuffer( + m_device, m_renderpasses[EdgeDetection], images.image_views[Edges], m_extent); + + images.framebuffers[BlendingWeightCalculation] = + CreateWrappedFramebuffer(m_device, m_renderpasses[BlendingWeightCalculation], + images.image_views[Blend], m_extent); + + images.framebuffers[NeighborhoodBlending] = CreateWrappedFramebuffer( + m_device, m_renderpasses[NeighborhoodBlending], images.image_views[Output], m_extent); + } +} + +void SMAA::CreateSampler() { + m_sampler = CreateWrappedSampler(m_device); +} + +void SMAA::CreateShaders() { + // These match the order of the SMAAStage enum + static constexpr std::array vert_shader_sources{ + ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV), + ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV), + ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV), + }; + static constexpr std::array frag_shader_sources{ + ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV), + ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV), + ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV), + }; + + for (size_t i = 0; i < MaxSMAAStage; i++) { + m_vertex_shaders[i] = CreateWrappedShaderModule(m_device, vert_shader_sources[i]); + m_fragment_shaders[i] = CreateWrappedShaderModule(m_device, frag_shader_sources[i]); + } +} + +void SMAA::CreateDescriptorPool() { + // Edge detection: 1 descriptor + // Blending weight calculation: 3 descriptors + // Neighborhood blending: 2 descriptors + + // 6 descriptors, 3 descriptor sets per image + m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 6 * m_image_count, 3 * m_image_count); +} + +void SMAA::CreateDescriptorSetLayouts() { + m_descriptor_set_layouts[EdgeDetection] = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); + m_descriptor_set_layouts[BlendingWeightCalculation] = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); + m_descriptor_set_layouts[NeighborhoodBlending] = + CreateWrappedDescriptorSetLayout(m_device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); +} + +void SMAA::CreateDescriptorSets() { + std::vector<VkDescriptorSetLayout> layouts(m_descriptor_set_layouts.size()); + std::ranges::transform(m_descriptor_set_layouts, layouts.begin(), + [](auto& layout) { return *layout; }); + + for (auto& images : m_dynamic_images) { + images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); + } +} + +void SMAA::CreatePipelineLayouts() { + for (size_t i = 0; i < MaxSMAAStage; i++) { + m_pipeline_layouts[i] = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layouts[i]); + } +} + +void SMAA::CreatePipelines() { + for (size_t i = 0; i < MaxSMAAStage; i++) { + m_pipelines[i] = + CreateWrappedPipeline(m_device, m_renderpasses[i], m_pipeline_layouts[i], + std::tie(m_vertex_shaders[i], m_fragment_shaders[i])); + } +} + +void SMAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { + Images& images = m_dynamic_images[image_index]; + std::vector<VkDescriptorImageInfo> image_infos; + std::vector<VkWriteDescriptorSet> updates; + image_infos.reserve(6); + + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, + images.descriptor_sets[EdgeDetection], 0)); + + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Edges], + images.descriptor_sets[BlendingWeightCalculation], + 0)); + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Area], + images.descriptor_sets[BlendingWeightCalculation], + 1)); + updates.push_back( + CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Search], + images.descriptor_sets[BlendingWeightCalculation], 2)); + + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, + images.descriptor_sets[NeighborhoodBlending], 0)); + updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Blend], + images.descriptor_sets[NeighborhoodBlending], 1)); + + m_device.GetLogical().UpdateDescriptorSets(updates, {}); +} + +void SMAA::UploadImages(Scheduler& scheduler) { + if (m_images_ready) { + return; + } + + static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; + static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; + + UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent, + VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes)); + UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, + VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); + + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + for (auto& images : m_dynamic_images) { + for (size_t i = 0; i < MaxDynamicImage; i++) { + ClearColorImage(cmdbuf, *images.images[i]); + } + } + }); + scheduler.Finish(); + + m_images_ready = true; +} + +void SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) { + Images& images = m_dynamic_images[image_index]; + + VkImage input_image = *inout_image; + VkImage output_image = *images.images[Output]; + VkImage edges_image = *images.images[Edges]; + VkImage blend_image = *images.images[Blend]; + + VkDescriptorSet edge_detection_descriptor_set = images.descriptor_sets[EdgeDetection]; + VkDescriptorSet blending_weight_calculation_descriptor_set = + images.descriptor_sets[BlendingWeightCalculation]; + VkDescriptorSet neighborhood_blending_descriptor_set = + images.descriptor_sets[NeighborhoodBlending]; + + VkFramebuffer edge_detection_framebuffer = *images.framebuffers[EdgeDetection]; + VkFramebuffer blending_weight_calculation_framebuffer = + *images.framebuffers[BlendingWeightCalculation]; + VkFramebuffer neighborhood_blending_framebuffer = *images.framebuffers[NeighborhoodBlending]; + + UploadImages(scheduler); + UpdateDescriptorSets(*inout_image_view, image_index); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { + TransitionImageLayout(cmdbuf, input_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, *m_renderpasses[EdgeDetection], edge_detection_framebuffer, + m_extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[EdgeDetection]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipeline_layouts[EdgeDetection], 0, + edge_detection_descriptor_set, {}); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, *m_renderpasses[BlendingWeightCalculation], + blending_weight_calculation_framebuffer, m_extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipelines[BlendingWeightCalculation]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipeline_layouts[BlendingWeightCalculation], 0, + blending_weight_calculation_descriptor_set, {}); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + + TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + BeginRenderPass(cmdbuf, *m_renderpasses[NeighborhoodBlending], + neighborhood_blending_framebuffer, m_extent); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[NeighborhoodBlending]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, + *m_pipeline_layouts[NeighborhoodBlending], 0, + neighborhood_blending_descriptor_set, {}); + cmdbuf.Draw(3, 1, 0, 0); + cmdbuf.EndRenderPass(); + TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); + }); + + *inout_image = *images.images[Output]; + *inout_image_view = *images.image_views[Output]; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_smaa.h b/src/video_core/renderer_vulkan/present/smaa.h index 0e214258a..fdf6def07 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.h +++ b/src/video_core/renderer_vulkan/present/smaa.h @@ -4,6 +4,7 @@ #pragma once #include <array> +#include "video_core/renderer_vulkan/present/anti_alias_pass.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -13,12 +14,14 @@ class Device; class Scheduler; class StagingBufferPool; -class SMAA { +class SMAA final : public AntiAliasPass { public: explicit SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent); - VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view); + ~SMAA() override; + + void Draw(Scheduler& scheduler, size_t image_index, VkImage* inout_image, + VkImageView* inout_image_view) override; private: enum SMAAStage { diff --git a/src/video_core/renderer_vulkan/vk_smaa.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 70644ea82..6ee16595d 100644 --- a/src/video_core/renderer_vulkan/vk_smaa.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -1,29 +1,25 @@ -// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <list> - #include "common/assert.h" #include "common/polyfill_ranges.h" - -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/vk_smaa.h" -#include "video_core/smaa_area_tex.h" -#include "video_core/smaa_search_tex.h" -#include "video_core/vulkan_common/vulkan_device.h" - -#include "video_core/host_shaders/smaa_blending_weight_calculation_frag_spv.h" -#include "video_core/host_shaders/smaa_blending_weight_calculation_vert_spv.h" -#include "video_core/host_shaders/smaa_edge_detection_frag_spv.h" -#include "video_core/host_shaders/smaa_edge_detection_vert_spv.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_frag_spv.h" -#include "video_core/host_shaders/smaa_neighborhood_blending_vert_spv.h" +#include "video_core/renderer_vulkan/present/util.h" namespace Vulkan { -namespace { -#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) +vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage) { + const VkBufferCreateInfo dst_buffer_info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = size, + .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + return allocator.CreateBuffer(dst_buffer_info, usage); +} vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) { const VkImageCreateInfo image_ci{ @@ -48,7 +44,7 @@ vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, } void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, - VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL) { + VkImageLayout source_layout) { constexpr VkFlags flags{VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT}; const VkImageMemoryBarrier barrier{ @@ -75,7 +71,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, vk::Image& image, VkExtent2D dimensions, VkFormat format, - std::span<const u8> initial_contents = {}) { + std::span<const u8> initial_contents) { const VkBufferCreateInfo upload_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, @@ -114,6 +110,70 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc scheduler.Finish(); } +void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer, + VkExtent3D extent) { + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const VkImageMemoryBarrier image_write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + static constexpr VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const VkBufferImageCopy copy{ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset{.x = 0, .y = 0, .z = 0}, + .imageExtent{extent}, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + read_barrier); + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffer, copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, + memory_write_barrier, nullptr, image_write_barrier); +} + vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) { return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, @@ -131,16 +191,18 @@ vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkF }); } -vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format) { +vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format, + VkImageLayout initial_layout) { const VkAttachmentDescription attachment{ .flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT, .format = format, .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD, + .loadOp = initial_layout == VK_IMAGE_LAYOUT_UNDEFINED ? VK_ATTACHMENT_LOAD_OP_DONT_CARE + : VK_ATTACHMENT_LOAD_OP_LOAD, .storeOp = VK_ATTACHMENT_STORE_OP_STORE, .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD, .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE, - .initialLayout = VK_IMAGE_LAYOUT_GENERAL, + .initialLayout = initial_layout, .finalLayout = VK_IMAGE_LAYOUT_GENERAL, }; @@ -200,13 +262,13 @@ vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& r }); } -vk::Sampler CreateWrappedSampler(const Device& device) { +vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter) { return device.GetLogical().CreateSampler(VkSamplerCreateInfo{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, + .magFilter = filter, + .minFilter = filter, .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, @@ -233,30 +295,34 @@ vk::ShaderModule CreateWrappedShaderModule(const Device& device, std::span<const }); } -vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, u32 max_descriptors, - u32 max_sets) { - const VkDescriptorPoolSize pool_size{ - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast<u32>(max_descriptors), - }; +vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, size_t max_descriptors, + size_t max_sets, + std::initializer_list<VkDescriptorType> types) { + std::vector<VkDescriptorPoolSize> pool_sizes(types.size()); + for (u32 i = 0; i < types.size(); i++) { + pool_sizes[i] = VkDescriptorPoolSize{ + .type = std::data(types)[i], + .descriptorCount = static_cast<u32>(max_descriptors), + }; + } return device.GetLogical().CreateDescriptorPool(VkDescriptorPoolCreateInfo{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, .pNext = nullptr, .flags = 0, - .maxSets = max_sets, - .poolSizeCount = 1, - .pPoolSizes = &pool_size, + .maxSets = static_cast<u32>(max_sets), + .poolSizeCount = static_cast<u32>(pool_sizes.size()), + .pPoolSizes = pool_sizes.data(), }); } -vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout(const Device& device, - u32 max_sampler_bindings) { - std::vector<VkDescriptorSetLayoutBinding> bindings(max_sampler_bindings); - for (u32 i = 0; i < max_sampler_bindings; i++) { +vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout( + const Device& device, std::initializer_list<VkDescriptorType> types) { + std::vector<VkDescriptorSetLayoutBinding> bindings(types.size()); + for (size_t i = 0; i < types.size(); i++) { bindings[i] = { - .binding = i, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, + .binding = static_cast<u32>(i), + .descriptorType = std::data(types)[i], .descriptorCount = 1, .stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, .pImmutableSamplers = nullptr, @@ -298,7 +364,8 @@ vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, vk::PipelineLayout& layout, - std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders) { + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders, + bool enable_blending) { const std::array<VkPipelineShaderStageCreateInfo, 2> shader_stages{{ { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, @@ -376,7 +443,7 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp .alphaToOneEnable = VK_FALSE, }; - constexpr VkPipelineColorBlendAttachmentState color_blend_attachment{ + constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_disabled{ .blendEnable = VK_FALSE, .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, @@ -388,6 +455,18 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, }; + constexpr VkPipelineColorBlendAttachmentState color_blend_attachment_enabled{ + .blendEnable = VK_TRUE, + .srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA, + .dstColorBlendFactor = VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | + VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, + }; + const VkPipelineColorBlendStateCreateInfo color_blend_ci{ .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, .pNext = nullptr, @@ -395,7 +474,8 @@ vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderp .logicOpEnable = VK_FALSE, .logicOp = VK_LOGIC_OP_COPY, .attachmentCount = 1, - .pAttachments = &color_blend_attachment, + .pAttachments = + enable_blending ? &color_blend_attachment_enabled : &color_blend_attachment_disabled, .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, }; @@ -459,6 +539,56 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo> }; } +vk::Sampler CreateBilinearSampler(const Device& device) { + const VkSamplerCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_LINEAR, + .minFilter = VK_FILTER_LINEAR, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + return device.GetLogical().CreateSampler(ci); +} + +vk::Sampler CreateNearestNeighborSampler(const Device& device) { + const VkSamplerCreateInfo ci_nn{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .magFilter = VK_FILTER_NEAREST, + .minFilter = VK_FILTER_NEAREST, + .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, + .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + .mipLodBias = 0.0f, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 0.0f, + .compareEnable = VK_FALSE, + .compareOp = VK_COMPARE_OP_NEVER, + .minLod = 0.0f, + .maxLod = 0.0f, + .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, + .unnormalizedCoordinates = VK_FALSE, + }; + + return device.GetLogical().CreateSampler(ci_nn); +} + void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { static constexpr std::array<VkImageSubresourceRange, 1> subresources{{{ .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, @@ -471,12 +601,12 @@ void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image) { cmdbuf.ClearColorImage(image, VK_IMAGE_LAYOUT_GENERAL, {}, subresources); } -void BeginRenderPass(vk::CommandBuffer& cmdbuf, vk::RenderPass& render_pass, - VkFramebuffer framebuffer, VkExtent2D extent) { +void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer, + VkExtent2D extent) { const VkRenderPassBeginInfo renderpass_bi{ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, .pNext = nullptr, - .renderPass = *render_pass, + .renderPass = render_pass, .framebuffer = framebuffer, .renderArea{ .offset{}, @@ -503,248 +633,4 @@ void BeginRenderPass(vk::CommandBuffer& cmdbuf, vk::RenderPass& render_pass, cmdbuf.SetScissor(0, scissor); } -} // Anonymous namespace - -SMAA::SMAA(const Device& device, MemoryAllocator& allocator, size_t image_count, VkExtent2D extent) - : m_device(device), m_allocator(allocator), m_extent(extent), - m_image_count(static_cast<u32>(image_count)) { - CreateImages(); - CreateRenderPasses(); - CreateSampler(); - CreateShaders(); - CreateDescriptorPool(); - CreateDescriptorSetLayouts(); - CreateDescriptorSets(); - CreatePipelineLayouts(); - CreatePipelines(); -} - -void SMAA::CreateImages() { - static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; - - m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM); - m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM); - - m_static_image_views[Area] = - CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM); - m_static_image_views[Search] = - CreateWrappedImageView(m_device, m_static_images[Search], VK_FORMAT_R8_UNORM); - - for (u32 i = 0; i < m_image_count; i++) { - Images& images = m_dynamic_images.emplace_back(); - - images.images[Blend] = - CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT); - images.images[Output] = - CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT); - - images.image_views[Blend] = - CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT); - images.image_views[Edges] = - CreateWrappedImageView(m_device, images.images[Edges], VK_FORMAT_R16G16_SFLOAT); - images.image_views[Output] = - CreateWrappedImageView(m_device, images.images[Output], VK_FORMAT_R16G16B16A16_SFLOAT); - } -} - -void SMAA::CreateRenderPasses() { - m_renderpasses[EdgeDetection] = CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16_SFLOAT); - m_renderpasses[BlendingWeightCalculation] = - CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); - m_renderpasses[NeighborhoodBlending] = - CreateWrappedRenderPass(m_device, VK_FORMAT_R16G16B16A16_SFLOAT); - - for (auto& images : m_dynamic_images) { - images.framebuffers[EdgeDetection] = CreateWrappedFramebuffer( - m_device, m_renderpasses[EdgeDetection], images.image_views[Edges], m_extent); - - images.framebuffers[BlendingWeightCalculation] = - CreateWrappedFramebuffer(m_device, m_renderpasses[BlendingWeightCalculation], - images.image_views[Blend], m_extent); - - images.framebuffers[NeighborhoodBlending] = CreateWrappedFramebuffer( - m_device, m_renderpasses[NeighborhoodBlending], images.image_views[Output], m_extent); - } -} - -void SMAA::CreateSampler() { - m_sampler = CreateWrappedSampler(m_device); -} - -void SMAA::CreateShaders() { - // These match the order of the SMAAStage enum - static constexpr std::array vert_shader_sources{ - ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_VERT_SPV), - ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_VERT_SPV), - ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_VERT_SPV), - }; - static constexpr std::array frag_shader_sources{ - ARRAY_TO_SPAN(SMAA_EDGE_DETECTION_FRAG_SPV), - ARRAY_TO_SPAN(SMAA_BLENDING_WEIGHT_CALCULATION_FRAG_SPV), - ARRAY_TO_SPAN(SMAA_NEIGHBORHOOD_BLENDING_FRAG_SPV), - }; - - for (size_t i = 0; i < MaxSMAAStage; i++) { - m_vertex_shaders[i] = CreateWrappedShaderModule(m_device, vert_shader_sources[i]); - m_fragment_shaders[i] = CreateWrappedShaderModule(m_device, frag_shader_sources[i]); - } -} - -void SMAA::CreateDescriptorPool() { - // Edge detection: 1 descriptor - // Blending weight calculation: 3 descriptors - // Neighborhood blending: 2 descriptors - - // 6 descriptors, 3 descriptor sets per image - m_descriptor_pool = CreateWrappedDescriptorPool(m_device, 6 * m_image_count, 3 * m_image_count); -} - -void SMAA::CreateDescriptorSetLayouts() { - m_descriptor_set_layouts[EdgeDetection] = CreateWrappedDescriptorSetLayout(m_device, 1); - m_descriptor_set_layouts[BlendingWeightCalculation] = - CreateWrappedDescriptorSetLayout(m_device, 3); - m_descriptor_set_layouts[NeighborhoodBlending] = CreateWrappedDescriptorSetLayout(m_device, 2); -} - -void SMAA::CreateDescriptorSets() { - std::vector<VkDescriptorSetLayout> layouts(m_descriptor_set_layouts.size()); - std::ranges::transform(m_descriptor_set_layouts, layouts.begin(), - [](auto& layout) { return *layout; }); - - for (auto& images : m_dynamic_images) { - images.descriptor_sets = CreateWrappedDescriptorSets(m_descriptor_pool, layouts); - } -} - -void SMAA::CreatePipelineLayouts() { - for (size_t i = 0; i < MaxSMAAStage; i++) { - m_pipeline_layouts[i] = CreateWrappedPipelineLayout(m_device, m_descriptor_set_layouts[i]); - } -} - -void SMAA::CreatePipelines() { - for (size_t i = 0; i < MaxSMAAStage; i++) { - m_pipelines[i] = - CreateWrappedPipeline(m_device, m_renderpasses[i], m_pipeline_layouts[i], - std::tie(m_vertex_shaders[i], m_fragment_shaders[i])); - } -} - -void SMAA::UpdateDescriptorSets(VkImageView image_view, size_t image_index) { - Images& images = m_dynamic_images[image_index]; - std::vector<VkDescriptorImageInfo> image_infos; - std::vector<VkWriteDescriptorSet> updates; - image_infos.reserve(6); - - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, - images.descriptor_sets[EdgeDetection], 0)); - - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Edges], - images.descriptor_sets[BlendingWeightCalculation], - 0)); - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Area], - images.descriptor_sets[BlendingWeightCalculation], - 1)); - updates.push_back( - CreateWriteDescriptorSet(image_infos, *m_sampler, *m_static_image_views[Search], - images.descriptor_sets[BlendingWeightCalculation], 2)); - - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, image_view, - images.descriptor_sets[NeighborhoodBlending], 0)); - updates.push_back(CreateWriteDescriptorSet(image_infos, *m_sampler, *images.image_views[Blend], - images.descriptor_sets[NeighborhoodBlending], 1)); - - m_device.GetLogical().UpdateDescriptorSets(updates, {}); -} - -void SMAA::UploadImages(Scheduler& scheduler) { - if (m_images_ready) { - return; - } - - static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT}; - static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT}; - - UploadImage(m_device, m_allocator, scheduler, m_static_images[Area], area_extent, - VK_FORMAT_R8G8_UNORM, ARRAY_TO_SPAN(areaTexBytes)); - UploadImage(m_device, m_allocator, scheduler, m_static_images[Search], search_extent, - VK_FORMAT_R8_UNORM, ARRAY_TO_SPAN(searchTexBytes)); - - scheduler.Record([&](vk::CommandBuffer cmdbuf) { - for (auto& images : m_dynamic_images) { - for (size_t i = 0; i < MaxDynamicImage; i++) { - ClearColorImage(cmdbuf, *images.images[i]); - } - } - }); - scheduler.Finish(); - - m_images_ready = true; -} - -VkImageView SMAA::Draw(Scheduler& scheduler, size_t image_index, VkImage source_image, - VkImageView source_image_view) { - Images& images = m_dynamic_images[image_index]; - - VkImage output_image = *images.images[Output]; - VkImage edges_image = *images.images[Edges]; - VkImage blend_image = *images.images[Blend]; - - VkDescriptorSet edge_detection_descriptor_set = images.descriptor_sets[EdgeDetection]; - VkDescriptorSet blending_weight_calculation_descriptor_set = - images.descriptor_sets[BlendingWeightCalculation]; - VkDescriptorSet neighborhood_blending_descriptor_set = - images.descriptor_sets[NeighborhoodBlending]; - - VkFramebuffer edge_detection_framebuffer = *images.framebuffers[EdgeDetection]; - VkFramebuffer blending_weight_calculation_framebuffer = - *images.framebuffers[BlendingWeightCalculation]; - VkFramebuffer neighborhood_blending_framebuffer = *images.framebuffers[NeighborhoodBlending]; - - UploadImages(scheduler); - UpdateDescriptorSets(source_image_view, image_index); - - scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([=, this](vk::CommandBuffer cmdbuf) { - TransitionImageLayout(cmdbuf, source_image, VK_IMAGE_LAYOUT_GENERAL); - TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); - BeginRenderPass(cmdbuf, m_renderpasses[EdgeDetection], edge_detection_framebuffer, - m_extent); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[EdgeDetection]); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipeline_layouts[EdgeDetection], 0, - edge_detection_descriptor_set, {}); - cmdbuf.Draw(3, 1, 0, 0); - cmdbuf.EndRenderPass(); - - TransitionImageLayout(cmdbuf, edges_image, VK_IMAGE_LAYOUT_GENERAL); - TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); - BeginRenderPass(cmdbuf, m_renderpasses[BlendingWeightCalculation], - blending_weight_calculation_framebuffer, m_extent); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipelines[BlendingWeightCalculation]); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipeline_layouts[BlendingWeightCalculation], 0, - blending_weight_calculation_descriptor_set, {}); - cmdbuf.Draw(3, 1, 0, 0); - cmdbuf.EndRenderPass(); - - TransitionImageLayout(cmdbuf, blend_image, VK_IMAGE_LAYOUT_GENERAL); - TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); - BeginRenderPass(cmdbuf, m_renderpasses[NeighborhoodBlending], - neighborhood_blending_framebuffer, m_extent); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipelines[NeighborhoodBlending]); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, - *m_pipeline_layouts[NeighborhoodBlending], 0, - neighborhood_blending_descriptor_set, {}); - cmdbuf.Draw(3, 1, 0, 0); - cmdbuf.EndRenderPass(); - TransitionImageLayout(cmdbuf, output_image, VK_IMAGE_LAYOUT_GENERAL); - }); - - return *images.image_views[Output]; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h new file mode 100644 index 000000000..1104aaa15 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/util.h @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Vulkan { + +#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0]))) + +vk::Buffer CreateWrappedBuffer(MemoryAllocator& allocator, VkDeviceSize size, MemoryUsage usage); + +vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format); +void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout, + VkImageLayout source_layout = VK_IMAGE_LAYOUT_GENERAL); +void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler, + vk::Image& image, VkExtent2D dimensions, VkFormat format, + std::span<const u8> initial_contents = {}); +void DownloadColorImage(vk::CommandBuffer& cmdbuf, VkImage image, VkBuffer buffer, + VkExtent3D extent); +void ClearColorImage(vk::CommandBuffer& cmdbuf, VkImage image); + +vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format); +vk::RenderPass CreateWrappedRenderPass(const Device& device, VkFormat format, + VkImageLayout initial_layout = VK_IMAGE_LAYOUT_GENERAL); +vk::Framebuffer CreateWrappedFramebuffer(const Device& device, vk::RenderPass& render_pass, + vk::ImageView& dest_image, VkExtent2D extent); +vk::Sampler CreateWrappedSampler(const Device& device, VkFilter filter = VK_FILTER_LINEAR); +vk::ShaderModule CreateWrappedShaderModule(const Device& device, std::span<const u32> code); +vk::DescriptorPool CreateWrappedDescriptorPool(const Device& device, size_t max_descriptors, + size_t max_sets, + std::initializer_list<VkDescriptorType> types = { + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); +vk::DescriptorSetLayout CreateWrappedDescriptorSetLayout( + const Device& device, std::initializer_list<VkDescriptorType> types); +vk::DescriptorSets CreateWrappedDescriptorSets(vk::DescriptorPool& pool, + vk::Span<VkDescriptorSetLayout> layouts); +vk::PipelineLayout CreateWrappedPipelineLayout(const Device& device, + vk::DescriptorSetLayout& layout); +vk::Pipeline CreateWrappedPipeline(const Device& device, vk::RenderPass& renderpass, + vk::PipelineLayout& layout, + std::tuple<vk::ShaderModule&, vk::ShaderModule&> shaders, + bool enable_blending = false); +VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector<VkDescriptorImageInfo>& images, + VkSampler sampler, VkImageView view, + VkDescriptorSet set, u32 binding); +vk::Sampler CreateBilinearSampler(const Device& device); +vk::Sampler CreateNearestNeighborSampler(const Device& device); + +void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer, + VkExtent2D extent); + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp new file mode 100644 index 000000000..c5db0230d --- /dev/null +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.cpp @@ -0,0 +1,137 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "core/frontend/framebuffer_layout.h" +#include "video_core/framebuffer_config.h" +#include "video_core/host_shaders/vulkan_present_vert_spv.h" +#include "video_core/renderer_vulkan/present/layer.h" +#include "video_core/renderer_vulkan/present/present_push_constants.h" +#include "video_core/renderer_vulkan/present/util.h" +#include "video_core/renderer_vulkan/present/window_adapt_pass.h" +#include "video_core/renderer_vulkan/vk_present_manager.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" +#include "video_core/vulkan_common/vulkan_device.h" +#include "video_core/vulkan_common/vulkan_memory_allocator.h" + +namespace Vulkan { + +WindowAdaptPass::WindowAdaptPass(const Device& device_, VkFormat frame_format, + vk::Sampler&& sampler_, vk::ShaderModule&& fragment_shader_) + : device(device_), sampler(std::move(sampler_)), fragment_shader(std::move(fragment_shader_)) { + CreateDescriptorSetLayout(); + CreatePipelineLayout(); + CreateVertexShader(); + CreateRenderPass(frame_format); + CreatePipeline(); +} + +WindowAdaptPass::~WindowAdaptPass() = default; + +void WindowAdaptPass::Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, size_t image_index, + std::list<Layer>& layers, + std::span<const Tegra::FramebufferConfig> configs, + const Layout::FramebufferLayout& layout, Frame* dst) { + + const VkFramebuffer host_framebuffer{*dst->framebuffer}; + const VkRenderPass renderpass{*render_pass}; + const VkPipeline graphics_pipeline{*pipeline}; + const VkPipelineLayout graphics_pipeline_layout{*pipeline_layout}; + const VkExtent2D render_area{ + .width = dst->width, + .height = dst->height, + }; + + const size_t layer_count = configs.size(); + std::vector<PresentPushConstants> push_constants(layer_count); + std::vector<VkDescriptorSet> descriptor_sets(layer_count); + + auto layer_it = layers.begin(); + for (size_t i = 0; i < layer_count; i++) { + layer_it->ConfigureDraw(&push_constants[i], &descriptor_sets[i], rasterizer, *sampler, + image_index, configs[i], layout); + layer_it++; + } + + scheduler.Record([=](vk::CommandBuffer cmdbuf) { + const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; + const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; + const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; + const VkClearAttachment clear_attachment{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .colorAttachment = 0, + .clearValue = + { + .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + }, + }; + const VkClearRect clear_rect{ + .rect = + { + .offset = {0, 0}, + .extent = render_area, + }, + .baseArrayLayer = 0, + .layerCount = 1, + }; + + BeginRenderPass(cmdbuf, renderpass, host_framebuffer, render_area); + cmdbuf.ClearAttachments({clear_attachment}, {clear_rect}); + + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); + for (size_t i = 0; i < layer_count; i++) { + cmdbuf.PushConstants(graphics_pipeline_layout, VK_SHADER_STAGE_VERTEX_BIT, + push_constants[i]); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline_layout, 0, + descriptor_sets[i], {}); + cmdbuf.Draw(4, 1, 0, 0); + } + + cmdbuf.EndRenderPass(); + }); +} + +VkDescriptorSetLayout WindowAdaptPass::GetDescriptorSetLayout() { + return *descriptor_set_layout; +} + +VkRenderPass WindowAdaptPass::GetRenderPass() { + return *render_pass; +} + +void WindowAdaptPass::CreateDescriptorSetLayout() { + descriptor_set_layout = + CreateWrappedDescriptorSetLayout(device, {VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER}); +} + +void WindowAdaptPass::CreatePipelineLayout() { + const VkPushConstantRange range{ + .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, + .offset = 0, + .size = sizeof(PresentPushConstants), + }; + + pipeline_layout = device.GetLogical().CreatePipelineLayout(VkPipelineLayoutCreateInfo{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .setLayoutCount = 1, + .pSetLayouts = descriptor_set_layout.address(), + .pushConstantRangeCount = 1, + .pPushConstantRanges = &range, + }); +} + +void WindowAdaptPass::CreateVertexShader() { + vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); +} + +void WindowAdaptPass::CreateRenderPass(VkFormat frame_format) { + render_pass = CreateWrappedRenderPass(device, frame_format, VK_IMAGE_LAYOUT_UNDEFINED); +} + +void WindowAdaptPass::CreatePipeline() { + pipeline = CreateWrappedPipeline(device, render_pass, pipeline_layout, + std::tie(vertex_shader, fragment_shader), false); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/window_adapt_pass.h b/src/video_core/renderer_vulkan/present/window_adapt_pass.h new file mode 100644 index 000000000..0e2edfc31 --- /dev/null +++ b/src/video_core/renderer_vulkan/present/window_adapt_pass.h @@ -0,0 +1,58 @@ +// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include <list> + +#include "common/math_util.h" +#include "video_core/vulkan_common/vulkan_wrapper.h" + +namespace Layout { +struct FramebufferLayout; +} + +namespace Tegra { +struct FramebufferConfig; +} + +namespace Vulkan { + +class Device; +struct Frame; +class Layer; +class Scheduler; +class RasterizerVulkan; + +class WindowAdaptPass final { +public: + explicit WindowAdaptPass(const Device& device, VkFormat frame_format, vk::Sampler&& sampler, + vk::ShaderModule&& fragment_shader); + ~WindowAdaptPass(); + + void Draw(RasterizerVulkan& rasterizer, Scheduler& scheduler, size_t image_index, + std::list<Layer>& layers, std::span<const Tegra::FramebufferConfig> configs, + const Layout::FramebufferLayout& layout, Frame* dst); + + VkDescriptorSetLayout GetDescriptorSetLayout(); + VkRenderPass GetRenderPass(); + +private: + void CreateDescriptorSetLayout(); + void CreatePipelineLayout(); + void CreateVertexShader(); + void CreateRenderPass(VkFormat frame_format); + void CreatePipeline(); + +private: + const Device& device; + vk::DescriptorSetLayout descriptor_set_layout; + vk::PipelineLayout pipeline_layout; + vk::Sampler sampler; + vk::ShaderModule vertex_shader; + vk::ShaderModule fragment_shader; + vk::RenderPass render_pass; + vk::Pipeline pipeline; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 1631276c6..48a105327 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -20,12 +20,14 @@ #include "core/frontend/graphics_context.h" #include "core/telemetry_session.h" #include "video_core/gpu.h" +#include "video_core/renderer_vulkan/present/util.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" #include "video_core/renderer_vulkan/vk_rasterizer.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_state_tracker.h" #include "video_core/renderer_vulkan/vk_swapchain.h" +#include "video_core/textures/decoders.h" #include "video_core/vulkan_common/vulkan_debug_callback.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_instance.h" @@ -97,10 +99,10 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_, render_window.GetFramebufferLayout().height), present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain, surface), - blit_screen(device_memory, render_window, device, memory_allocator, swapchain, - present_manager, scheduler, screen_info), - rasterizer(render_window, gpu, device_memory, screen_info, device, memory_allocator, - state_tracker, scheduler) { + blit_swapchain(device_memory, device, memory_allocator, present_manager, scheduler), + blit_screenshot(device_memory, device, memory_allocator, present_manager, scheduler), + rasterizer(render_window, gpu, device_memory, device, memory_allocator, state_tracker, + scheduler) { if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) { turbo_mode.emplace(instance, dld); scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); }); @@ -116,25 +118,22 @@ RendererVulkan::~RendererVulkan() { void(device.GetLogical().WaitIdle()); } -void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - if (!framebuffer) { +void RendererVulkan::Composite(std::span<const Tegra::FramebufferConfig> framebuffers) { + if (framebuffers.empty()) { return; } + SCOPE_EXIT({ render_window.OnFrameDisplayed(); }); + if (!render_window.IsShown()) { return; } - // Update screen info if the framebuffer size has changed. - screen_info.width = framebuffer->width; - screen_info.height = framebuffer->height; - - const DAddr framebuffer_addr = framebuffer->address + framebuffer->offset; - const bool use_accelerated = - rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); - RenderScreenshot(*framebuffer, use_accelerated); + RenderScreenshot(framebuffers); Frame* frame = present_manager.GetRenderFrame(); - blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated); + blit_swapchain.DrawToFrame(rasterizer, frame, framebuffers, + render_window.GetFramebufferLayout(), swapchain.GetImageCount(), + swapchain.GetImageViewFormat()); scheduler.Flush(*frame->render_ready); present_manager.Present(frame); @@ -168,143 +167,37 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } -void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { +void Vulkan::RendererVulkan::RenderScreenshot( + std::span<const Tegra::FramebufferConfig> framebuffers) { if (!renderer_settings.screenshot_requested) { return; } + + constexpr VkFormat ScreenshotFormat{VK_FORMAT_B8G8R8A8_UNORM}; const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; - vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .extent = - { - .width = layout.width, - .height = layout.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | - VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); - const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *staging_image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_B8G8R8A8_UNORM, - .components{ - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }); - const VkExtent2D render_area{.width = layout.width, .height = layout.height}; - const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); - blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated); + auto frame = [&]() { + Frame f{}; + f.image = CreateWrappedImage(memory_allocator, VkExtent2D{layout.width, layout.height}, + ScreenshotFormat); + f.image_view = CreateWrappedImageView(device, f.image, ScreenshotFormat); + f.framebuffer = blit_screenshot.CreateFramebuffer(layout, *f.image_view, ScreenshotFormat); + return f; + }(); - const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4); - const VkBufferCreateInfo dst_buffer_info{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = buffer_size, - .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - const vk::Buffer dst_buffer = - memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download); + blit_screenshot.DrawToFrame(rasterizer, &frame, framebuffers, layout, 1, + VK_FORMAT_B8G8R8A8_UNORM); + + const auto dst_buffer = CreateWrappedBuffer( + memory_allocator, static_cast<VkDeviceSize>(layout.width * layout.height * 4), + MemoryUsage::Download); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([&](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier read_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *staging_image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - const VkImageMemoryBarrier image_write_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = *staging_image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - static constexpr VkMemoryBarrier memory_write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - }; - const VkBufferImageCopy copy{ - .bufferOffset = 0, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset{.x = 0, .y = 0, .z = 0}, - .imageExtent{ - .width = layout.width, - .height = layout.height, - .depth = 1, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer, - copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, memory_write_barrier, nullptr, image_write_barrier); + DownloadColorImage(cmdbuf, *frame.image, *dst_buffer, + VkExtent3D{layout.width, layout.height, 1}); }); + // Ensure the copy is fully completed before saving the screenshot scheduler.Finish(); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index 11c52287a..c6d8a0f21 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -46,7 +46,7 @@ public: std::unique_ptr<Core::Frontend::GraphicsContext> context_); ~RendererVulkan() override; - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void Composite(std::span<const Tegra::FramebufferConfig> framebuffers) override; VideoCore::RasterizerInterface* ReadRasterizer() override { return &rasterizer; @@ -59,7 +59,7 @@ public: private: void Report() const; - void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); + void RenderScreenshot(std::span<const Tegra::FramebufferConfig> framebuffers); Core::TelemetrySession& telemetry_session; Tegra::MaxwellDeviceMemoryManager& device_memory; @@ -72,15 +72,14 @@ private: vk::DebugUtilsMessenger debug_messenger; vk::SurfaceKHR surface; - ScreenInfo screen_info; - Device device; MemoryAllocator memory_allocator; StateTracker state_tracker; Scheduler scheduler; Swapchain swapchain; PresentManager present_manager; - BlitScreen blit_screen; + BlitScreen blit_swapchain; + BlitScreen blit_screenshot; RasterizerVulkan rasterizer; std::optional<TurboMode> turbo_mode; }; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 610f27c84..2275fcc46 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -1,522 +1,143 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include <algorithm> -#include <array> -#include <cstring> -#include <memory> -#include <vector> - -#include "common/assert.h" -#include "common/common_types.h" -#include "common/math_util.h" -#include "common/polyfill_ranges.h" -#include "common/settings.h" -#include "core/core.h" -#include "core/frontend/emu_window.h" -#include "video_core/gpu.h" -#include "video_core/host1x/gpu_device_memory_manager.h" -#include "video_core/host_shaders/fxaa_frag_spv.h" -#include "video_core/host_shaders/fxaa_vert_spv.h" -#include "video_core/host_shaders/present_bicubic_frag_spv.h" -#include "video_core/host_shaders/present_gaussian_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" -#include "video_core/host_shaders/vulkan_present_vert_spv.h" -#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/framebuffer_config.h" +#include "video_core/renderer_vulkan/present/filters.h" +#include "video_core/renderer_vulkan/present/layer.h" #include "video_core/renderer_vulkan/vk_blit_screen.h" -#include "video_core/renderer_vulkan/vk_fsr.h" +#include "video_core/renderer_vulkan/vk_present_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/renderer_vulkan/vk_smaa.h" -#include "video_core/renderer_vulkan/vk_swapchain.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" -#include "video_core/vulkan_common/vulkan_device.h" -#include "video_core/vulkan_common/vulkan_memory_allocator.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -namespace { - -struct ScreenRectVertex { - ScreenRectVertex() = default; - explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {} - - std::array<f32, 2> position; - std::array<f32, 2> tex_coord; - - static VkVertexInputBindingDescription GetDescription() { - return { - .binding = 0, - .stride = sizeof(ScreenRectVertex), - .inputRate = VK_VERTEX_INPUT_RATE_VERTEX, - }; - } - - static std::array<VkVertexInputAttributeDescription, 2> GetAttributes() { - return {{ - { - .location = 0, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(ScreenRectVertex, position), - }, - { - .location = 1, - .binding = 0, - .format = VK_FORMAT_R32G32_SFLOAT, - .offset = offsetof(ScreenRectVertex, tex_coord), - }, - }}; - } -}; +BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, const Device& device_, + MemoryAllocator& memory_allocator_, PresentManager& present_manager_, + Scheduler& scheduler_) + : device_memory{device_memory_}, device{device_}, memory_allocator{memory_allocator_}, + present_manager{present_manager_}, scheduler{scheduler_}, image_count{1}, + swapchain_view_format{VK_FORMAT_B8G8R8A8_UNORM} {} -std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) { - // clang-format off - return { 2.f / width, 0.f, 0.f, 0.f, - 0.f, 2.f / height, 0.f, 0.f, - 0.f, 0.f, 1.f, 0.f, - -1.f, -1.f, 0.f, 1.f}; - // clang-format on -} - -u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) { - using namespace VideoCore::Surface; - return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)); -} +BlitScreen::~BlitScreen() = default; -std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) { - return static_cast<std::size_t>(framebuffer.stride) * - static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer); +void BlitScreen::WaitIdle() { + present_manager.WaitPresent(); + scheduler.Finish(); + device.GetLogical().WaitIdle(); } -VkFormat GetFormat(const Tegra::FramebufferConfig& framebuffer) { - switch (framebuffer.pixel_format) { - case Service::android::PixelFormat::Rgba8888: - case Service::android::PixelFormat::Rgbx8888: - return VK_FORMAT_A8B8G8R8_UNORM_PACK32; - case Service::android::PixelFormat::Rgb565: - return VK_FORMAT_R5G6B5_UNORM_PACK16; - case Service::android::PixelFormat::Bgra8888: - return VK_FORMAT_B8G8R8A8_UNORM; +void BlitScreen::SetWindowAdaptPass() { + layers.clear(); + scaling_filter = Settings::values.scaling_filter.GetValue(); + + switch (scaling_filter) { + case Settings::ScalingFilter::NearestNeighbor: + window_adapt = MakeNearestNeighbor(device, swapchain_view_format); + break; + case Settings::ScalingFilter::Bicubic: + window_adapt = MakeBicubic(device, swapchain_view_format); + break; + case Settings::ScalingFilter::Gaussian: + window_adapt = MakeGaussian(device, swapchain_view_format); + break; + case Settings::ScalingFilter::ScaleForce: + window_adapt = MakeScaleForce(device, swapchain_view_format); + break; + case Settings::ScalingFilter::Fsr: + case Settings::ScalingFilter::Bilinear: default: - UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}", - static_cast<u32>(framebuffer.pixel_format)); - return VK_FORMAT_A8B8G8R8_UNORM_PACK32; + window_adapt = MakeBilinear(device, swapchain_view_format); + break; } } -} // Anonymous namespace - -struct BlitScreen::BufferData { - struct { - std::array<f32, 4 * 4> modelview_matrix; - } uniform; - - std::array<ScreenRectVertex, 4> vertices; - - // Unaligned image data goes here -}; - -BlitScreen::BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory_, - Core::Frontend::EmuWindow& render_window_, const Device& device_, - MemoryAllocator& memory_allocator_, Swapchain& swapchain_, - PresentManager& present_manager_, Scheduler& scheduler_, - const ScreenInfo& screen_info_) - : device_memory{device_memory_}, render_window{render_window_}, device{device_}, - memory_allocator{memory_allocator_}, swapchain{swapchain_}, present_manager{present_manager_}, - scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} { - resource_ticks.resize(image_count); - swapchain_view_format = swapchain.GetImageViewFormat(); - - CreateStaticResources(); - CreateDynamicResources(); -} - -BlitScreen::~BlitScreen() = default; - -static Common::Rectangle<f32> NormalizeCrop(const Tegra::FramebufferConfig& framebuffer, - const ScreenInfo& screen_info) { - f32 left, top, right, bottom; - - if (!framebuffer.crop_rect.IsEmpty()) { - // If crop rectangle is not empty, apply properties from rectangle. - left = static_cast<f32>(framebuffer.crop_rect.left); - top = static_cast<f32>(framebuffer.crop_rect.top); - right = static_cast<f32>(framebuffer.crop_rect.right); - bottom = static_cast<f32>(framebuffer.crop_rect.bottom); - } else { - // Otherwise, fall back to framebuffer dimensions. - left = 0; - top = 0; - right = static_cast<f32>(framebuffer.width); - bottom = static_cast<f32>(framebuffer.height); - } - - // Apply transformation flags. - auto framebuffer_transform_flags = framebuffer.transform_flags; +void BlitScreen::DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, + std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout, + size_t current_swapchain_image_count, + VkFormat current_swapchain_view_format) { + bool resource_update_required = false; + bool presentation_recreate_required = false; - if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) { - // Switch left and right. - std::swap(left, right); - } - if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) { - // Switch top and bottom. - std::swap(top, bottom); + // Recreate dynamic resources if the adapting filter changed + if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue()) { + resource_update_required = true; } - framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH; - framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV; - if (True(framebuffer_transform_flags)) { - UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", - static_cast<u32>(framebuffer_transform_flags)); + // Recreate dynamic resources if the image count changed + const size_t old_swapchain_image_count = + std::exchange(image_count, current_swapchain_image_count); + if (old_swapchain_image_count != current_swapchain_image_count) { + resource_update_required = true; } - // Get the screen properties. - const f32 screen_width = static_cast<f32>(screen_info.width); - const f32 screen_height = static_cast<f32>(screen_info.height); - - // Normalize coordinate space. - left /= screen_width; - top /= screen_height; - right /= screen_width; - bottom /= screen_height; - - return Common::Rectangle<f32>(left, top, right, bottom); -} - -void BlitScreen::Recreate() { - present_manager.WaitPresent(); - scheduler.Finish(); - device.GetLogical().WaitIdle(); - CreateDynamicResources(); -} - -void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, - const VkFramebuffer& host_framebuffer, const Layout::FramebufferLayout layout, - VkExtent2D render_area, bool use_accelerated) { - RefreshResources(framebuffer); - - // Finish any pending renderpass - scheduler.RequestOutsideRenderPassOperationContext(); - - scheduler.Wait(resource_ticks[image_index]); - resource_ticks[image_index] = scheduler.CurrentTick(); - - VkImage source_image = use_accelerated ? screen_info.image : *raw_images[image_index]; - VkImageView source_image_view = - use_accelerated ? screen_info.image_view : *raw_image_views[image_index]; - - BufferData data; - SetUniformData(data, layout); - SetVertexData(data, framebuffer, layout); - - const std::span<u8> mapped_span = buffer.Mapped(); - std::memcpy(mapped_span.data(), &data, sizeof(data)); - - if (!use_accelerated) { - const u64 image_offset = GetRawImageOffset(framebuffer); - - const DAddr framebuffer_addr = framebuffer.address + framebuffer.offset; - const u8* const host_ptr = device_memory.GetPointer<u8>(framebuffer_addr); - - // TODO(Rodrigo): Read this from HLE - constexpr u32 block_height_log2 = 4; - const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); - const u64 linear_size{GetSizeInBytes(framebuffer)}; - const u64 tiled_size{Tegra::Texture::CalculateSize(true, bytes_per_pixel, - framebuffer.stride, framebuffer.height, - 1, block_height_log2, 0)}; - Tegra::Texture::UnswizzleTexture( - mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), - bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); - - const VkBufferImageCopy copy{ - .bufferOffset = image_offset, - .bufferRowLength = 0, - .bufferImageHeight = 0, - .imageSubresource = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1, - }, - .imageOffset = {.x = 0, .y = 0, .z = 0}, - .imageExtent = - { - .width = framebuffer.width, - .height = framebuffer.height, - .depth = 1, - }, - }; - scheduler.Record([this, copy, index = image_index](vk::CommandBuffer cmdbuf) { - const VkImage image = *raw_images[index]; - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - VkImageMemoryBarrier read_barrier = base_barrier; - read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT; - read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - VkImageMemoryBarrier write_barrier = base_barrier; - write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, - read_barrier); - cmdbuf.CopyBufferToImage(*buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - 0, write_barrier); - }); + // Recreate the presentation frame if the format or dimensions of the window changed + const VkFormat old_swapchain_view_format = + std::exchange(swapchain_view_format, current_swapchain_view_format); + if (old_swapchain_view_format != current_swapchain_view_format || + layout.width != frame->width || layout.height != frame->height) { + resource_update_required = true; + presentation_recreate_required = true; } - const auto anti_alias_pass = Settings::values.anti_aliasing.GetValue(); - if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Fxaa) { - UpdateAADescriptorSet(source_image_view, false); - const u32 up_scale = Settings::values.resolution_info.up_scale; - const u32 down_shift = Settings::values.resolution_info.down_shift; - VkExtent2D size{ - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - }; - scheduler.Record([this, index = image_index, size, - anti_alias_pass](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = {}, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - - { - VkImageMemoryBarrier fsr_write_barrier = base_barrier; - fsr_write_barrier.image = *aa_image; - fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, fsr_write_barrier); - } + // If we have a pending resource update, perform it + if (resource_update_required) { + // Wait for idle to ensure no resources are in use + WaitIdle(); - const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; - const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; - const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; - const VkClearValue clear_color{ - .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, - }; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = *aa_renderpass, - .framebuffer = *aa_framebuffer, - .renderArea = - { - .offset = {0, 0}, - .extent = size, - }, - .clearValueCount = 1, - .pClearValues = &clear_color, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast<float>(size.width), - .height = static_cast<float>(size.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - const VkRect2D scissor{ - .offset = {0, 0}, - .extent = size, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - switch (anti_alias_pass) { - case Settings::AntiAliasing::Fxaa: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); - break; - default: - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline); - break; - } - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); + // Update window adapt pass + SetWindowAdaptPass(); - cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *aa_pipeline_layout, 0, - aa_descriptor_sets[index], {}); - cmdbuf.Draw(4, 1, 0, 0); - cmdbuf.EndRenderPass(); - - { - VkImageMemoryBarrier blit_read_barrier = base_barrier; - blit_read_barrier.image = *aa_image; - blit_read_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, blit_read_barrier); - } - }); - source_image_view = *aa_image_view; - } - if (use_accelerated && anti_alias_pass == Settings::AntiAliasing::Smaa) { - if (!smaa) { - const u32 up_scale = Settings::values.resolution_info.up_scale; - const u32 down_shift = Settings::values.resolution_info.down_shift; - const VkExtent2D smaa_size{ - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - }; - CreateSMAA(smaa_size); + // Update frame format if needed + if (presentation_recreate_required) { + present_manager.RecreateFrame(frame, layout.width, layout.height, swapchain_view_format, + window_adapt->GetRenderPass()); } - source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view); } - if (fsr) { - const auto crop_rect = NormalizeCrop(framebuffer, screen_info); - const VkExtent2D fsr_input_size{ - .width = Settings::values.resolution_info.ScaleUp(screen_info.width), - .height = Settings::values.resolution_info.ScaleUp(screen_info.height), - }; - VkImageView fsr_image_view = - fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); - UpdateDescriptorSet(fsr_image_view, true); - } else { - const bool is_nn = - Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::NearestNeighbor; - UpdateDescriptorSet(source_image_view, is_nn); - } - - scheduler.Record([this, host_framebuffer, index = image_index, - size = render_area](vk::CommandBuffer cmdbuf) { - const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; - const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; - const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; - const VkClearValue clear_color{ - .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, - }; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = *renderpass, - .framebuffer = host_framebuffer, - .renderArea = - { - .offset = {0, 0}, - .extent = size, - }, - .clearValueCount = 1, - .pClearValues = &clear_color, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast<float>(size.width), - .height = static_cast<float>(size.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - const VkRect2D scissor{ - .offset = {0, 0}, - .extent = size, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - auto graphics_pipeline = [this]() { - switch (Settings::values.scaling_filter.GetValue()) { - case Settings::ScalingFilter::NearestNeighbor: - case Settings::ScalingFilter::Bilinear: - return *bilinear_pipeline; - case Settings::ScalingFilter::Bicubic: - return *bicubic_pipeline; - case Settings::ScalingFilter::Gaussian: - return *gaussian_pipeline; - case Settings::ScalingFilter::ScaleForce: - return *scaleforce_pipeline; - default: - return *bilinear_pipeline; - } - }(); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, graphics_pipeline); - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_sets[index], {}); - cmdbuf.Draw(4, 1, 0, 0); - cmdbuf.EndRenderPass(); - }); -} + // Add additional layers if needed + const VkExtent2D window_size{ + .width = layout.screen.GetWidth(), + .height = layout.screen.GetHeight(), + }; -void BlitScreen::DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated) { - // Recreate dynamic resources if the the image count or input format changed - const VkFormat current_framebuffer_format = - std::exchange(framebuffer_view_format, GetFormat(framebuffer)); - if (const std::size_t swapchain_images = swapchain.GetImageCount(); - swapchain_images != image_count || current_framebuffer_format != framebuffer_view_format) { - image_count = swapchain_images; - Recreate(); + while (layers.size() < framebuffers.size()) { + layers.emplace_back(device, memory_allocator, scheduler, device_memory, image_count, + window_size, window_adapt->GetDescriptorSetLayout()); } - // Recreate the presentation frame if the dimensions of the window changed - const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); - if (layout.width != frame->width || layout.height != frame->height) { - Recreate(); - present_manager.RecreateFrame(frame, layout.width, layout.height, swapchain_view_format, - *renderpass); - } + // Perform the draw + window_adapt->Draw(rasterizer, scheduler, image_index, layers, framebuffers, layout, frame); - const VkExtent2D render_area{frame->width, frame->height}; - Draw(framebuffer, *frame->framebuffer, layout, render_area, use_accelerated); + // Advance to next image if (++image_index >= image_count) { image_index = 0; } } -vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { - return CreateFramebuffer(image_view, extent, renderpass); +vk::Framebuffer BlitScreen::CreateFramebuffer(const Layout::FramebufferLayout& layout, + VkImageView image_view, + VkFormat current_view_format) { + const bool format_updated = + std::exchange(swapchain_view_format, current_view_format) != current_view_format; + if (!window_adapt || scaling_filter != Settings::values.scaling_filter.GetValue() || + format_updated) { + WaitIdle(); + SetWindowAdaptPass(); + } + const VkExtent2D extent{ + .width = layout.width, + .height = layout.height, + }; + return CreateFramebuffer(image_view, extent, window_adapt->GetRenderPass()); } vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, - vk::RenderPass& rd) { + VkRenderPass render_pass) { return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{ .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .renderPass = *rd, + .renderPass = render_pass, .attachmentCount = 1, .pAttachments = &image_view, .width = extent.width, @@ -525,969 +146,4 @@ vk::Framebuffer BlitScreen::CreateFramebuffer(const VkImageView& image_view, VkE }); } -void BlitScreen::CreateStaticResources() { - CreateShaders(); - CreateSampler(); -} - -void BlitScreen::CreateDynamicResources() { - CreateDescriptorPool(); - CreateDescriptorSetLayout(); - CreateDescriptorSets(); - CreatePipelineLayout(); - CreateRenderPass(); - CreateGraphicsPipeline(); - fsr.reset(); - smaa.reset(); - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - CreateFSR(); - } -} - -void BlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) { - if (Settings::values.scaling_filter.GetValue() == Settings::ScalingFilter::Fsr) { - if (!fsr) { - CreateFSR(); - } - } else { - fsr.reset(); - } - - if (framebuffer.width == raw_width && framebuffer.height == raw_height && - framebuffer.pixel_format == pixel_format && !raw_images.empty()) { - return; - } - - raw_width = framebuffer.width; - raw_height = framebuffer.height; - pixel_format = framebuffer.pixel_format; - - smaa.reset(); - ReleaseRawImages(); - - CreateStagingBuffer(framebuffer); - CreateRawImages(framebuffer); -} - -void BlitScreen::CreateShaders() { - vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV); - fxaa_vertex_shader = BuildShader(device, FXAA_VERT_SPV); - fxaa_fragment_shader = BuildShader(device, FXAA_FRAG_SPV); - bilinear_fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV); - bicubic_fragment_shader = BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); - gaussian_fragment_shader = BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV); - if (device.IsFloat16Supported()) { - scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP16_FRAG_SPV); - } else { - scaleforce_fragment_shader = BuildShader(device, VULKAN_PRESENT_SCALEFORCE_FP32_FRAG_SPV); - } -} - -void BlitScreen::CreateDescriptorPool() { - const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ - { - .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = static_cast<u32>(image_count), - }, - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast<u32>(image_count), - }, - }}; - - const std::array<VkDescriptorPoolSize, 1> pool_sizes_aa{{ - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast<u32>(image_count * 2), - }, - }}; - - const VkDescriptorPoolCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .maxSets = static_cast<u32>(image_count), - .poolSizeCount = static_cast<u32>(pool_sizes.size()), - .pPoolSizes = pool_sizes.data(), - }; - descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); - - const VkDescriptorPoolCreateInfo ci_aa{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .maxSets = static_cast<u32>(image_count), - .poolSizeCount = static_cast<u32>(pool_sizes_aa.size()), - .pPoolSizes = pool_sizes_aa.data(), - }; - aa_descriptor_pool = device.GetLogical().CreateDescriptorPool(ci_aa); -} - -void BlitScreen::CreateRenderPass() { - renderpass = CreateRenderPassImpl(swapchain_view_format); -} - -vk::RenderPass BlitScreen::CreateRenderPassImpl(VkFormat format) { - const VkAttachmentDescription color_attachment{ - .flags = 0, - .format = format, - .samples = VK_SAMPLE_COUNT_1_BIT, - .loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR, - .storeOp = VK_ATTACHMENT_STORE_OP_STORE, - .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE, - .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .finalLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkAttachmentReference color_attachment_ref{ - .attachment = 0, - .layout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkSubpassDescription subpass_description{ - .flags = 0, - .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, - .inputAttachmentCount = 0, - .pInputAttachments = nullptr, - .colorAttachmentCount = 1, - .pColorAttachments = &color_attachment_ref, - .pResolveAttachments = nullptr, - .pDepthStencilAttachment = nullptr, - .preserveAttachmentCount = 0, - .pPreserveAttachments = nullptr, - }; - - const VkSubpassDependency dependency{ - .srcSubpass = VK_SUBPASS_EXTERNAL, - .dstSubpass = 0, - .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, - .dependencyFlags = 0, - }; - - const VkRenderPassCreateInfo renderpass_ci{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .attachmentCount = 1, - .pAttachments = &color_attachment, - .subpassCount = 1, - .pSubpasses = &subpass_description, - .dependencyCount = 1, - .pDependencies = &dependency, - }; - - return device.GetLogical().CreateRenderPass(renderpass_ci); -} - -void BlitScreen::CreateDescriptorSetLayout() { - const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = nullptr, - }, - }}; - - const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings_aa{{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_VERTEX_BIT, - .pImmutableSamplers = nullptr, - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT, - .pImmutableSamplers = nullptr, - }, - }}; - - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast<u32>(layout_bindings.size()), - .pBindings = layout_bindings.data(), - }; - - const VkDescriptorSetLayoutCreateInfo ci_aa{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast<u32>(layout_bindings_aa.size()), - .pBindings = layout_bindings_aa.data(), - }; - - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); - aa_descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci_aa); -} - -void BlitScreen::CreateDescriptorSets() { - const std::vector layouts(image_count, *descriptor_set_layout); - const std::vector layouts_aa(image_count, *aa_descriptor_set_layout); - - const VkDescriptorSetAllocateInfo ai{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = nullptr, - .descriptorPool = *descriptor_pool, - .descriptorSetCount = static_cast<u32>(image_count), - .pSetLayouts = layouts.data(), - }; - - const VkDescriptorSetAllocateInfo ai_aa{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = nullptr, - .descriptorPool = *aa_descriptor_pool, - .descriptorSetCount = static_cast<u32>(image_count), - .pSetLayouts = layouts_aa.data(), - }; - - descriptor_sets = descriptor_pool.Allocate(ai); - aa_descriptor_sets = aa_descriptor_pool.Allocate(ai_aa); -} - -void BlitScreen::CreatePipelineLayout() { - const VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - const VkPipelineLayoutCreateInfo ci_aa{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = aa_descriptor_set_layout.address(), - .pushConstantRangeCount = 0, - .pPushConstantRanges = nullptr, - }; - pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); - aa_pipeline_layout = device.GetLogical().CreatePipelineLayout(ci_aa); -} - -void BlitScreen::CreateGraphicsPipeline() { - const std::array<VkPipelineShaderStageCreateInfo, 2> bilinear_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *bilinear_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const std::array<VkPipelineShaderStageCreateInfo, 2> bicubic_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *bicubic_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const std::array<VkPipelineShaderStageCreateInfo, 2> gaussian_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *gaussian_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const std::array<VkPipelineShaderStageCreateInfo, 2> scaleforce_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *scaleforce_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const auto vertex_binding_description = ScreenRectVertex::GetDescription(); - const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - - const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = &vertex_binding_description, - .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, - .pVertexAttributeDescriptions = vertex_attrs_description.data(), - }; - - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = VK_FALSE, - }; - - const VkPipelineViewportStateCreateInfo viewport_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = 1, - .pViewports = nullptr, - .scissorCount = 1, - .pScissors = nullptr, - }; - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = VK_FALSE, - .rasterizerDiscardEnable = VK_FALSE, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CLOCKWISE, - .depthBiasEnable = VK_FALSE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisampling_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineColorBlendAttachmentState color_blend_attachment{ - .blendEnable = VK_FALSE, - .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .alphaBlendOp = VK_BLEND_OP_ADD, - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - }; - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = 1, - .pAttachments = &color_blend_attachment, - .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, - }; - - static constexpr std::array dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast<u32>(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkGraphicsPipelineCreateInfo bilinear_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(bilinear_shader_stages.size()), - .pStages = bilinear_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - const VkGraphicsPipelineCreateInfo bicubic_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(bicubic_shader_stages.size()), - .pStages = bicubic_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - const VkGraphicsPipelineCreateInfo gaussian_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(gaussian_shader_stages.size()), - .pStages = gaussian_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - const VkGraphicsPipelineCreateInfo scaleforce_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(scaleforce_shader_stages.size()), - .pStages = scaleforce_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *pipeline_layout, - .renderPass = *renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - bilinear_pipeline = device.GetLogical().CreateGraphicsPipeline(bilinear_pipeline_ci); - bicubic_pipeline = device.GetLogical().CreateGraphicsPipeline(bicubic_pipeline_ci); - gaussian_pipeline = device.GetLogical().CreateGraphicsPipeline(gaussian_pipeline_ci); - scaleforce_pipeline = device.GetLogical().CreateGraphicsPipeline(scaleforce_pipeline_ci); -} - -void BlitScreen::CreateSampler() { - const VkSamplerCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .mipLodBias = 0.0f, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 0.0f, - .compareEnable = VK_FALSE, - .compareOp = VK_COMPARE_OP_NEVER, - .minLod = 0.0f, - .maxLod = 0.0f, - .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, - .unnormalizedCoordinates = VK_FALSE, - }; - - const VkSamplerCreateInfo ci_nn{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .magFilter = VK_FILTER_NEAREST, - .minFilter = VK_FILTER_NEAREST, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - .mipLodBias = 0.0f, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 0.0f, - .compareEnable = VK_FALSE, - .compareOp = VK_COMPARE_OP_NEVER, - .minLod = 0.0f, - .maxLod = 0.0f, - .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, - .unnormalizedCoordinates = VK_FALSE, - }; - - sampler = device.GetLogical().CreateSampler(ci); - nn_sampler = device.GetLogical().CreateSampler(ci_nn); -} - -void BlitScreen::ReleaseRawImages() { - for (const u64 tick : resource_ticks) { - scheduler.Wait(tick); - } - raw_images.clear(); - aa_image_view.reset(); - aa_image.reset(); - buffer.reset(); -} - -void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) { - const VkBufferCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .size = CalculateBufferSize(framebuffer), - .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | - VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - }; - - buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload); -} - -void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) { - raw_images.resize(image_count); - raw_image_views.resize(image_count); - - const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1, - u32 down_shift = 0) { - u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT - : VK_IMAGE_USAGE_TRANSFER_DST_BIT; - return memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : framebuffer_view_format, - .extent = - { - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = used_on_framebuffer ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR, - .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | extra_usages, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); - }; - const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) { - return device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = used_on_framebuffer ? VK_FORMAT_R16G16B16A16_SFLOAT : framebuffer_view_format, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); - }; - - for (size_t i = 0; i < image_count; ++i) { - raw_images[i] = create_image(); - raw_image_views[i] = create_image_view(raw_images[i]); - } - - // AA Resources - const u32 up_scale = Settings::values.resolution_info.up_scale; - const u32 down_shift = Settings::values.resolution_info.down_shift; - aa_image = create_image(true, up_scale, down_shift); - aa_image_view = create_image_view(aa_image, true); - VkExtent2D size{ - .width = (up_scale * framebuffer.width) >> down_shift, - .height = (up_scale * framebuffer.height) >> down_shift, - }; - if (aa_renderpass) { - aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); - return; - } - aa_renderpass = CreateRenderPassImpl(VK_FORMAT_R16G16B16A16_SFLOAT); - aa_framebuffer = CreateFramebuffer(*aa_image_view, size, aa_renderpass); - - const std::array<VkPipelineShaderStageCreateInfo, 2> fxaa_shader_stages{{ - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_VERTEX_BIT, - .module = *fxaa_vertex_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - { - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_FRAGMENT_BIT, - .module = *fxaa_fragment_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - }}; - - const auto vertex_binding_description = ScreenRectVertex::GetDescription(); - const auto vertex_attrs_description = ScreenRectVertex::GetAttributes(); - - const VkPipelineVertexInputStateCreateInfo vertex_input_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .vertexBindingDescriptionCount = 1, - .pVertexBindingDescriptions = &vertex_binding_description, - .vertexAttributeDescriptionCount = u32{vertex_attrs_description.size()}, - .pVertexAttributeDescriptions = vertex_attrs_description.data(), - }; - - const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, - .primitiveRestartEnable = VK_FALSE, - }; - - const VkPipelineViewportStateCreateInfo viewport_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .viewportCount = 1, - .pViewports = nullptr, - .scissorCount = 1, - .pScissors = nullptr, - }; - - const VkPipelineRasterizationStateCreateInfo rasterization_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .depthClampEnable = VK_FALSE, - .rasterizerDiscardEnable = VK_FALSE, - .polygonMode = VK_POLYGON_MODE_FILL, - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_CLOCKWISE, - .depthBiasEnable = VK_FALSE, - .depthBiasConstantFactor = 0.0f, - .depthBiasClamp = 0.0f, - .depthBiasSlopeFactor = 0.0f, - .lineWidth = 1.0f, - }; - - const VkPipelineMultisampleStateCreateInfo multisampling_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, - .sampleShadingEnable = VK_FALSE, - .minSampleShading = 0.0f, - .pSampleMask = nullptr, - .alphaToCoverageEnable = VK_FALSE, - .alphaToOneEnable = VK_FALSE, - }; - - const VkPipelineColorBlendAttachmentState color_blend_attachment{ - .blendEnable = VK_FALSE, - .srcColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, - .colorBlendOp = VK_BLEND_OP_ADD, - .srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, - .alphaBlendOp = VK_BLEND_OP_ADD, - .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | - VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT, - }; - - const VkPipelineColorBlendStateCreateInfo color_blend_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .logicOpEnable = VK_FALSE, - .logicOp = VK_LOGIC_OP_COPY, - .attachmentCount = 1, - .pAttachments = &color_blend_attachment, - .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}, - }; - - static constexpr std::array dynamic_states{ - VK_DYNAMIC_STATE_VIEWPORT, - VK_DYNAMIC_STATE_SCISSOR, - }; - const VkPipelineDynamicStateCreateInfo dynamic_state_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .dynamicStateCount = static_cast<u32>(dynamic_states.size()), - .pDynamicStates = dynamic_states.data(), - }; - - const VkGraphicsPipelineCreateInfo fxaa_pipeline_ci{ - .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stageCount = static_cast<u32>(fxaa_shader_stages.size()), - .pStages = fxaa_shader_stages.data(), - .pVertexInputState = &vertex_input_ci, - .pInputAssemblyState = &input_assembly_ci, - .pTessellationState = nullptr, - .pViewportState = &viewport_state_ci, - .pRasterizationState = &rasterization_ci, - .pMultisampleState = &multisampling_ci, - .pDepthStencilState = nullptr, - .pColorBlendState = &color_blend_ci, - .pDynamicState = &dynamic_state_ci, - .layout = *aa_pipeline_layout, - .renderPass = *aa_renderpass, - .subpass = 0, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }; - - // AA - aa_pipeline = device.GetLogical().CreateGraphicsPipeline(fxaa_pipeline_ci); -} - -void BlitScreen::UpdateAADescriptorSet(VkImageView image_view, bool nn) const { - const VkDescriptorImageInfo image_info{ - .sampler = nn ? *nn_sampler : *sampler, - .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkWriteDescriptorSet sampler_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = aa_descriptor_sets[image_index], - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - const VkWriteDescriptorSet sampler_write_2{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = aa_descriptor_sets[image_index], - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, sampler_write_2}, {}); -} - -void BlitScreen::UpdateDescriptorSet(VkImageView image_view, bool nn) const { - const VkDescriptorBufferInfo buffer_info{ - .buffer = *buffer, - .offset = offsetof(BufferData, uniform), - .range = sizeof(BufferData::uniform), - }; - - const VkWriteDescriptorSet ubo_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index], - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, - .pImageInfo = nullptr, - .pBufferInfo = &buffer_info, - .pTexelBufferView = nullptr, - }; - - const VkDescriptorImageInfo image_info{ - .sampler = nn ? *nn_sampler : *sampler, - .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - const VkWriteDescriptorSet sampler_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index], - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); -} - -void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const { - data.uniform.modelview_matrix = - MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height)); -} - -void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout layout) const { - f32 left, top, right, bottom; - - if (fsr) { - // FSR has already applied the crop, so we just want to render the image - // it has produced. - left = 0; - top = 0; - right = 1; - bottom = 1; - } else { - // Get the normalized crop rectangle. - const auto crop = NormalizeCrop(framebuffer, screen_info); - - // Apply the crop. - left = crop.left; - top = crop.top; - right = crop.right; - bottom = crop.bottom; - } - - // Map the coordinates to the screen. - const auto& screen = layout.screen; - const auto x = static_cast<f32>(screen.left); - const auto y = static_cast<f32>(screen.top); - const auto w = static_cast<f32>(screen.GetWidth()); - const auto h = static_cast<f32>(screen.GetHeight()); - - data.vertices[0] = ScreenRectVertex(x, y, left, top); - data.vertices[1] = ScreenRectVertex(x + w, y, right, top); - data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom); - data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom); -} - -void BlitScreen::CreateSMAA(VkExtent2D smaa_size) { - smaa = std::make_unique<SMAA>(device, memory_allocator, image_count, smaa_size); -} - -void BlitScreen::CreateFSR() { - const auto& layout = render_window.GetFramebufferLayout(); - const VkExtent2D fsr_size{ - .width = layout.screen.GetWidth(), - .height = layout.screen.GetHeight(), - }; - fsr = std::make_unique<FSR>(device, memory_allocator, image_count, fsr_size); -} - -u64 BlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const { - return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count; -} - -u64 BlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const { - constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData)); - return first_image_offset + GetSizeInBytes(framebuffer) * image_index; -} - } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 3eff76009..cbdf2d5d0 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -3,10 +3,12 @@ #pragma once +#include <list> #include <memory> #include "core/frontend/framebuffer_layout.h" #include "video_core/host1x/gpu_device_memory_manager.h" +#include "video_core/renderer_vulkan/present/layer.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -14,155 +16,67 @@ namespace Core { class System; } -namespace Core::Frontend { -class EmuWindow; -} - namespace Tegra { struct FramebufferConfig; } -namespace VideoCore { -class RasterizerInterface; -} - -namespace Service::android { -enum class PixelFormat : u32; -} +namespace Settings { +enum class ScalingFilter : u32; +} // namespace Settings namespace Vulkan { -struct ScreenInfo; - class Device; -class FSR; class RasterizerVulkan; class Scheduler; -class SMAA; -class Swapchain; class PresentManager; +class WindowAdaptPass; struct Frame; -struct ScreenInfo { +struct FramebufferTextureInfo { VkImage image{}; VkImageView image_view{}; u32 width{}; u32 height{}; + u32 scaled_width{}; + u32 scaled_height{}; }; class BlitScreen { public: - explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, - Core::Frontend::EmuWindow& render_window, const Device& device, - MemoryAllocator& memory_manager, Swapchain& swapchain, - PresentManager& present_manager, Scheduler& scheduler, - const ScreenInfo& screen_info); + explicit BlitScreen(Tegra::MaxwellDeviceMemoryManager& device_memory, const Device& device, + MemoryAllocator& memory_allocator, PresentManager& present_manager, + Scheduler& scheduler); ~BlitScreen(); - void Recreate(); - - void Draw(const Tegra::FramebufferConfig& framebuffer, const VkFramebuffer& host_framebuffer, - const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated); - - void DrawToSwapchain(Frame* frame, const Tegra::FramebufferConfig& framebuffer, - bool use_accelerated); + void DrawToFrame(RasterizerVulkan& rasterizer, Frame* frame, + std::span<const Tegra::FramebufferConfig> framebuffers, + const Layout::FramebufferLayout& layout, size_t current_swapchain_image_count, + VkFormat current_swapchain_view_format); - [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, - VkExtent2D extent); - - [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, - VkExtent2D extent, vk::RenderPass& rd); + [[nodiscard]] vk::Framebuffer CreateFramebuffer(const Layout::FramebufferLayout& layout, + VkImageView image_view, + VkFormat current_view_format); private: - struct BufferData; - - void CreateStaticResources(); - void CreateShaders(); - void CreateDescriptorPool(); - void CreateRenderPass(); - vk::RenderPass CreateRenderPassImpl(VkFormat format); - void CreateDescriptorSetLayout(); - void CreateDescriptorSets(); - void CreatePipelineLayout(); - void CreateGraphicsPipeline(); - void CreateSampler(); - - void CreateDynamicResources(); - - void RefreshResources(const Tegra::FramebufferConfig& framebuffer); - void ReleaseRawImages(); - void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer); - void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); - - void UpdateDescriptorSet(VkImageView image_view, bool nn) const; - void UpdateAADescriptorSet(VkImageView image_view, bool nn) const; - void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; - void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, - const Layout::FramebufferLayout layout) const; - - void CreateSMAA(VkExtent2D smaa_size); - void CreateFSR(); - - u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; - u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer) const; + void WaitIdle(); + void SetWindowAdaptPass(); + vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent, + VkRenderPass render_pass); Tegra::MaxwellDeviceMemoryManager& device_memory; - Core::Frontend::EmuWindow& render_window; const Device& device; MemoryAllocator& memory_allocator; - Swapchain& swapchain; PresentManager& present_manager; Scheduler& scheduler; - std::size_t image_count; + std::size_t image_count{}; std::size_t image_index{}; - const ScreenInfo& screen_info; - - vk::ShaderModule vertex_shader; - vk::ShaderModule fxaa_vertex_shader; - vk::ShaderModule fxaa_fragment_shader; - vk::ShaderModule bilinear_fragment_shader; - vk::ShaderModule bicubic_fragment_shader; - vk::ShaderModule gaussian_fragment_shader; - vk::ShaderModule scaleforce_fragment_shader; - vk::DescriptorPool descriptor_pool; - vk::DescriptorSetLayout descriptor_set_layout; - vk::PipelineLayout pipeline_layout; - vk::Pipeline nearest_neighbor_pipeline; - vk::Pipeline bilinear_pipeline; - vk::Pipeline bicubic_pipeline; - vk::Pipeline gaussian_pipeline; - vk::Pipeline scaleforce_pipeline; - vk::RenderPass renderpass; - vk::DescriptorSets descriptor_sets; - vk::Sampler nn_sampler; - vk::Sampler sampler; - - vk::Buffer buffer; - - std::vector<u64> resource_ticks; - - std::vector<vk::Image> raw_images; - std::vector<vk::ImageView> raw_image_views; - - vk::DescriptorPool aa_descriptor_pool; - vk::DescriptorSetLayout aa_descriptor_set_layout; - vk::PipelineLayout aa_pipeline_layout; - vk::Pipeline aa_pipeline; - vk::RenderPass aa_renderpass; - vk::Framebuffer aa_framebuffer; - vk::DescriptorSets aa_descriptor_sets; - vk::Image aa_image; - vk::ImageView aa_image_view; - - u32 raw_width = 0; - u32 raw_height = 0; - Service::android::PixelFormat pixel_format{}; - VkFormat framebuffer_view_format; - VkFormat swapchain_view_format; - - std::unique_ptr<FSR> fsr; - std::unique_ptr<SMAA> smaa; + VkFormat swapchain_view_format{}; + + Settings::ScalingFilter scaling_filter{}; + std::unique_ptr<WindowAdaptPass> window_adapt{}; + std::list<Layer> layers{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp deleted file mode 100644 index f7a05fbc0..000000000 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ /dev/null @@ -1,420 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/common_types.h" -#include "common/div_ceil.h" -#include "common/settings.h" - -#include "video_core/fsr.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp16_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_easu_fp32_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp16_comp_spv.h" -#include "video_core/host_shaders/vulkan_fidelityfx_fsr_rcas_fp32_comp_spv.h" -#include "video_core/renderer_vulkan/vk_fsr.h" -#include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" -#include "video_core/vulkan_common/vulkan_device.h" - -namespace Vulkan { -using namespace FSR; - -FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image_count_, - VkExtent2D output_size_) - : device{device_}, memory_allocator{memory_allocator_}, image_count{image_count_}, - output_size{output_size_} { - - CreateImages(); - CreateSampler(); - CreateShaders(); - CreateDescriptorPool(); - CreateDescriptorSetLayout(); - CreateDescriptorSets(); - CreatePipelineLayout(); - CreatePipeline(); -} - -VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, - VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) { - - UpdateDescriptorSet(image_index, image_view); - - scheduler.Record([this, image_index, input_image_extent, crop_rect](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier base_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = 0, - .dstAccessMask = 0, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = {}, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; - - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); - - const f32 input_image_width = static_cast<f32>(input_image_extent.width); - const f32 input_image_height = static_cast<f32>(input_image_extent.height); - const f32 output_image_width = static_cast<f32>(output_size.width); - const f32 output_image_height = static_cast<f32>(output_size.height); - const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width; - const f32 viewport_x = crop_rect.left * input_image_width; - const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height; - const f32 viewport_y = crop_rect.top * input_image_height; - - std::array<u32, 4 * 4> push_constants; - FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4, - push_constants.data() + 8, push_constants.data() + 12, - - viewport_width, viewport_height, input_image_width, input_image_height, - output_image_width, output_image_height, viewport_x, viewport_y); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); - - { - VkImageMemoryBarrier fsr_write_barrier = base_barrier; - fsr_write_barrier.image = *images[image_index]; - fsr_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, fsr_write_barrier); - } - - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, - descriptor_sets[image_index * 2], {}); - cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), - Common::DivCeil(output_size.height, 16u), 1); - - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *rcas_pipeline); - - const float sharpening = - static_cast<float>(Settings::values.fsr_sharpening_slider.GetValue()) / 100.0f; - - FsrRcasCon(push_constants.data(), sharpening); - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); - - { - std::array<VkImageMemoryBarrier, 2> barriers; - auto& fsr_read_barrier = barriers[0]; - auto& blit_write_barrier = barriers[1]; - - fsr_read_barrier = base_barrier; - fsr_read_barrier.image = *images[image_index]; - fsr_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - fsr_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - blit_write_barrier = base_barrier; - blit_write_barrier.image = *images[image_count + image_index]; - blit_write_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - blit_write_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, {}, {}, barriers); - } - - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, - descriptor_sets[image_index * 2 + 1], {}); - cmdbuf.Dispatch(Common::DivCeil(output_size.width, 16u), - Common::DivCeil(output_size.height, 16u), 1); - - { - std::array<VkImageMemoryBarrier, 1> barriers; - auto& blit_read_barrier = barriers[0]; - - blit_read_barrier = base_barrier; - blit_read_barrier.image = *images[image_count + image_index]; - blit_read_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - blit_read_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, {}, {}, barriers); - } - }); - - return *image_views[image_count + image_index]; -} - -void FSR::CreateDescriptorPool() { - const std::array<VkDescriptorPoolSize, 2> pool_sizes{{ - { - .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = static_cast<u32>(image_count * 2), - }, - { - .type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = static_cast<u32>(image_count * 2), - }, - }}; - - const VkDescriptorPoolCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .maxSets = static_cast<u32>(image_count * 2), - .poolSizeCount = static_cast<u32>(pool_sizes.size()), - .pPoolSizes = pool_sizes.data(), - }; - descriptor_pool = device.GetLogical().CreateDescriptorPool(ci); -} - -void FSR::CreateDescriptorSetLayout() { - const std::array<VkDescriptorSetLayoutBinding, 2> layout_bindings{{ - { - .binding = 0, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = sampler.address(), - }, - { - .binding = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .pImmutableSamplers = sampler.address(), - }, - }}; - - const VkDescriptorSetLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .bindingCount = static_cast<u32>(layout_bindings.size()), - .pBindings = layout_bindings.data(), - }; - - descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout(ci); -} - -void FSR::CreateDescriptorSets() { - const u32 sets = static_cast<u32>(image_count * 2); - const std::vector layouts(sets, *descriptor_set_layout); - - const VkDescriptorSetAllocateInfo ai{ - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .pNext = nullptr, - .descriptorPool = *descriptor_pool, - .descriptorSetCount = sets, - .pSetLayouts = layouts.data(), - }; - - descriptor_sets = descriptor_pool.Allocate(ai); -} - -void FSR::CreateImages() { - images.resize(image_count * 2); - image_views.resize(image_count * 2); - - for (size_t i = 0; i < image_count * 2; ++i) { - images[i] = memory_allocator.CreateImage(VkImageCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .imageType = VK_IMAGE_TYPE_2D, - .format = VK_FORMAT_R16G16B16A16_SFLOAT, - .extent = - { - .width = output_size.width, - .height = output_size.height, - .depth = 1, - }, - .mipLevels = 1, - .arrayLayers = 1, - .samples = VK_SAMPLE_COUNT_1_BIT, - .tiling = VK_IMAGE_TILING_OPTIMAL, - .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_STORAGE_BIT | - VK_IMAGE_USAGE_SAMPLED_BIT, - .sharingMode = VK_SHARING_MODE_EXCLUSIVE, - .queueFamilyIndexCount = 0, - .pQueueFamilyIndices = nullptr, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - }); - image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .image = *images[i], - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = VK_FORMAT_R16G16B16A16_SFLOAT, - .components = - { - .r = VK_COMPONENT_SWIZZLE_IDENTITY, - .g = VK_COMPONENT_SWIZZLE_IDENTITY, - .b = VK_COMPONENT_SWIZZLE_IDENTITY, - .a = VK_COMPONENT_SWIZZLE_IDENTITY, - }, - .subresourceRange = - { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }); - } -} - -void FSR::CreatePipelineLayout() { - VkPushConstantRange push_const{ - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, - .offset = 0, - .size = sizeof(std::array<u32, 4 * 4>), - }; - VkPipelineLayoutCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .setLayoutCount = 1, - .pSetLayouts = descriptor_set_layout.address(), - .pushConstantRangeCount = 1, - .pPushConstantRanges = &push_const, - }; - - pipeline_layout = device.GetLogical().CreatePipelineLayout(ci); -} - -void FSR::UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const { - const auto fsr_image_view = *image_views[image_index]; - const auto blit_image_view = *image_views[image_count + image_index]; - - const VkDescriptorImageInfo image_info{ - .sampler = VK_NULL_HANDLE, - .imageView = image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - const VkDescriptorImageInfo fsr_image_info{ - .sampler = VK_NULL_HANDLE, - .imageView = fsr_image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - const VkDescriptorImageInfo blit_image_info{ - .sampler = VK_NULL_HANDLE, - .imageView = blit_image_view, - .imageLayout = VK_IMAGE_LAYOUT_GENERAL, - }; - - VkWriteDescriptorSet sampler_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index * 2], - .dstBinding = 0, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .pImageInfo = &image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - VkWriteDescriptorSet output_write{ - .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = nullptr, - .dstSet = descriptor_sets[image_index * 2], - .dstBinding = 1, - .dstArrayElement = 0, - .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, - .pImageInfo = &fsr_image_info, - .pBufferInfo = nullptr, - .pTexelBufferView = nullptr, - }; - - device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); - - sampler_write.dstSet = descriptor_sets[image_index * 2 + 1]; - sampler_write.pImageInfo = &fsr_image_info; - output_write.dstSet = descriptor_sets[image_index * 2 + 1]; - output_write.pImageInfo = &blit_image_info; - - device.GetLogical().UpdateDescriptorSets(std::array{sampler_write, output_write}, {}); -} - -void FSR::CreateSampler() { - const VkSamplerCreateInfo ci{ - .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .magFilter = VK_FILTER_LINEAR, - .minFilter = VK_FILTER_LINEAR, - .mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR, - .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, - .mipLodBias = 0.0f, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 0.0f, - .compareEnable = VK_FALSE, - .compareOp = VK_COMPARE_OP_NEVER, - .minLod = 0.0f, - .maxLod = 0.0f, - .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, - .unnormalizedCoordinates = VK_FALSE, - }; - - sampler = device.GetLogical().CreateSampler(ci); -} - -void FSR::CreateShaders() { - if (device.IsFloat16Supported()) { - easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP16_COMP_SPV); - rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP16_COMP_SPV); - } else { - easu_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_EASU_FP32_COMP_SPV); - rcas_shader = BuildShader(device, VULKAN_FIDELITYFX_FSR_RCAS_FP32_COMP_SPV); - } -} - -void FSR::CreatePipeline() { - VkPipelineShaderStageCreateInfo shader_stage_easu{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *easu_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }; - - VkPipelineShaderStageCreateInfo shader_stage_rcas{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *rcas_shader, - .pName = "main", - .pSpecializationInfo = nullptr, - }; - - VkComputePipelineCreateInfo pipeline_ci_easu{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = shader_stage_easu, - .layout = *pipeline_layout, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }; - - VkComputePipelineCreateInfo pipeline_ci_rcas{ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage = shader_stage_rcas, - .layout = *pipeline_layout, - .basePipelineHandle = VK_NULL_HANDLE, - .basePipelineIndex = 0, - }; - - easu_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_easu); - rcas_pipeline = device.GetLogical().CreateComputePipeline(pipeline_ci_rcas); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h deleted file mode 100644 index 3505c1416..000000000 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ /dev/null @@ -1,52 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include "common/math_util.h" -#include "video_core/vulkan_common/vulkan_memory_allocator.h" -#include "video_core/vulkan_common/vulkan_wrapper.h" - -namespace Vulkan { - -class Device; -class Scheduler; - -class FSR { -public: - explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, - VkExtent2D output_size); - VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, - VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect); - -private: - void CreateDescriptorPool(); - void CreateDescriptorSetLayout(); - void CreateDescriptorSets(); - void CreateImages(); - void CreateSampler(); - void CreateShaders(); - void CreatePipeline(); - void CreatePipelineLayout(); - - void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; - - const Device& device; - MemoryAllocator& memory_allocator; - size_t image_count; - VkExtent2D output_size; - - vk::DescriptorPool descriptor_pool; - vk::DescriptorSetLayout descriptor_set_layout; - vk::DescriptorSets descriptor_sets; - vk::PipelineLayout pipeline_layout; - vk::ShaderModule easu_shader; - vk::ShaderModule rcas_shader; - vk::Pipeline easu_pipeline; - vk::Pipeline rcas_pipeline; - vk::Sampler sampler; - std::vector<vk::Image> images; - std::vector<vk::ImageView> image_views; -}; - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 5bf41b81f..aa0a027bb 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -165,10 +165,9 @@ DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - ScreenInfo& screen_info_, const Device& device_, - MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, - Scheduler& scheduler_) - : gpu{gpu_}, device_memory{device_memory_}, screen_info{screen_info_}, device{device_}, + const Device& device_, MemoryAllocator& memory_allocator_, + StateTracker& state_tracker_, Scheduler& scheduler_) + : gpu{gpu_}, device_memory{device_memory_}, device{device_}, memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_}, staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler), guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler), @@ -783,23 +782,29 @@ void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_si query_cache.InvalidateRegion(*cpu_addr, copy_size); } -bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, - DAddr framebuffer_addr, u32 pixel_stride) { +std::optional<FramebufferTextureInfo> RasterizerVulkan::AccelerateDisplay( + const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) { if (!framebuffer_addr) { - return false; + return {}; } std::scoped_lock lock{texture_cache.mutex}; - ImageView* const image_view = + const auto [image_view, scaled] = texture_cache.TryFindFramebufferImageView(config, framebuffer_addr); if (!image_view) { - return false; + return {}; } query_cache.NotifySegment(false); - screen_info.image = image_view->ImageHandle(); - screen_info.image_view = image_view->Handle(Shader::TextureType::Color2D); - screen_info.width = image_view->size.width; - screen_info.height = image_view->size.height; - return true; + + const auto& resolution = Settings::values.resolution_info; + + FramebufferTextureInfo info{}; + info.image = image_view->ImageHandle(); + info.image_view = image_view->Handle(Shader::TextureType::Color2D); + info.width = image_view->size.width; + info.height = image_view->size.height; + info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width; + info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height; + return info; } void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 881ee0993..0617b37f0 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -43,7 +43,7 @@ class Maxwell3D; namespace Vulkan { -struct ScreenInfo; +struct FramebufferTextureInfo; class StateTracker; @@ -78,9 +78,8 @@ class RasterizerVulkan final : public VideoCore::RasterizerInterface, public: explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, Tegra::MaxwellDeviceMemoryManager& device_memory_, - ScreenInfo& screen_info_, const Device& device_, - MemoryAllocator& memory_allocator_, StateTracker& state_tracker_, - Scheduler& scheduler_); + const Device& device_, MemoryAllocator& memory_allocator_, + StateTracker& state_tracker_, Scheduler& scheduler_); ~RasterizerVulkan() override; void Draw(bool is_indexed, u32 instance_count) override; @@ -126,8 +125,6 @@ public: Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; void AccelerateInlineToMemory(GPUVAddr address, size_t copy_size, std::span<const u8> memory) override; - bool AccelerateDisplay(const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, - u32 pixel_stride) override; void LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; @@ -137,6 +134,10 @@ public: void ReleaseChannel(s32 channel_id) override; + std::optional<FramebufferTextureInfo> AccelerateDisplay(const Tegra::FramebufferConfig& config, + VAddr framebuffer_addr, + u32 pixel_stride); + private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; @@ -182,7 +183,6 @@ private: Tegra::GPU& gpu; Tegra::MaxwellDeviceMemoryManager& device_memory; - ScreenInfo& screen_info; const Device& device; MemoryAllocator& memory_allocator; StateTracker& state_tracker; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a7400adfa..a20c956ff 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -713,12 +713,12 @@ bool TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, } template <class P> -typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( +std::pair<typename P::ImageView*, bool> TextureCache<P>::TryFindFramebufferImageView( const Tegra::FramebufferConfig& config, DAddr cpu_addr) { // TODO: Properly implement this const auto it = page_table.find(cpu_addr >> YUZU_PAGEBITS); if (it == page_table.end()) { - return nullptr; + return {}; } const auto& image_map_ids = it->second; boost::container::small_vector<ImageId, 4> valid_image_ids; @@ -747,7 +747,8 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( const auto GetImageViewForFramebuffer = [&](ImageId image_id) { const ImageViewInfo info{ImageViewType::e2D, view_format}; - return &slot_image_views[FindOrEmplaceImageView(image_id, info)]; + return std::make_pair(&slot_image_views[FindOrEmplaceImageView(image_id, info)], + slot_images[image_id].IsRescaled()); }; if (valid_image_ids.size() == 1) [[likely]] { @@ -761,7 +762,7 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView( return GetImageViewForFramebuffer(*most_recent); } - return nullptr; + return {}; } template <class P> diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index f9aebb293..e7b910121 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -212,8 +212,8 @@ public: const Tegra::Engines::Fermi2D::Config& copy); /// Try to find a cached image view in the given CPU address - [[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config, - DAddr cpu_addr); + [[nodiscard]] std::pair<ImageView*, bool> TryFindFramebufferImageView( + const Tegra::FramebufferConfig& config, DAddr cpu_addr); /// Return true when there are uncommitted images to be downloaded [[nodiscard]] bool HasUncommittedFlushes() const noexcept; |