diff options
25 files changed, 260 insertions, 56 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2da983cad..7bb88c8ea 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -134,7 +134,7 @@ else() endif() # GCC bugs - if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "12" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "11" AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU") # These diagnostics would be great if they worked, but are just completely broken # and produce bogus errors on external libraries like fmt. add_compile_options( diff --git a/src/core/hle/kernel/k_hardware_timer.h b/src/core/hle/kernel/k_hardware_timer.h index 00bef6ea1..27f43cd19 100644 --- a/src/core/hle/kernel/k_hardware_timer.h +++ b/src/core/hle/kernel/k_hardware_timer.h @@ -19,13 +19,7 @@ public: void Initialize(); void Finalize(); - s64 GetCount() const { - return GetTick(); - } - - void RegisterTask(KTimerTask* task, s64 time_from_now) { - this->RegisterAbsoluteTask(task, GetTick() + time_from_now); - } + s64 GetTick() const; void RegisterAbsoluteTask(KTimerTask* task, s64 task_time) { KScopedDisableDispatch dd{m_kernel}; @@ -42,7 +36,6 @@ private: void EnableInterrupt(s64 wakeup_time); void DisableInterrupt(); bool GetInterruptEnabled(); - s64 GetTick() const; void DoTask(); private: diff --git a/src/core/hle/kernel/k_resource_limit.cpp b/src/core/hle/kernel/k_resource_limit.cpp index fcee26a29..d8a63aaf8 100644 --- a/src/core/hle/kernel/k_resource_limit.cpp +++ b/src/core/hle/kernel/k_resource_limit.cpp @@ -5,6 +5,7 @@ #include "common/overflow.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/hle/kernel/k_hardware_timer.h" #include "core/hle/kernel/k_resource_limit.h" #include "core/hle/kernel/svc_results.h" @@ -15,9 +16,7 @@ KResourceLimit::KResourceLimit(KernelCore& kernel) : KAutoObjectWithSlabHeapAndContainer{kernel}, m_lock{m_kernel}, m_cond_var{m_kernel} {} KResourceLimit::~KResourceLimit() = default; -void KResourceLimit::Initialize(const Core::Timing::CoreTiming* core_timing) { - m_core_timing = core_timing; -} +void KResourceLimit::Initialize() {} void KResourceLimit::Finalize() {} @@ -86,7 +85,7 @@ Result KResourceLimit::SetLimitValue(LimitableResource which, s64 value) { } bool KResourceLimit::Reserve(LimitableResource which, s64 value) { - return Reserve(which, value, m_core_timing->GetGlobalTimeNs().count() + DefaultTimeout); + return Reserve(which, value, m_kernel.HardwareTimer().GetTick() + DefaultTimeout); } bool KResourceLimit::Reserve(LimitableResource which, s64 value, s64 timeout) { @@ -117,7 +116,7 @@ bool KResourceLimit::Reserve(LimitableResource which, s64 value, s64 timeout) { } if (m_current_hints[index] + value <= m_limit_values[index] && - (timeout < 0 || m_core_timing->GetGlobalTimeNs().count() < timeout)) { + (timeout < 0 || m_kernel.HardwareTimer().GetTick() < timeout)) { m_waiter_count++; m_cond_var.Wait(std::addressof(m_lock), timeout, false); m_waiter_count--; @@ -154,7 +153,7 @@ void KResourceLimit::Release(LimitableResource which, s64 value, s64 hint) { KResourceLimit* CreateResourceLimitForProcess(Core::System& system, s64 physical_memory_size) { auto* resource_limit = KResourceLimit::Create(system.Kernel()); - resource_limit->Initialize(std::addressof(system.CoreTiming())); + resource_limit->Initialize(); // Initialize default resource limit values. // TODO(bunnei): These values are the system defaults, the limits for service processes are diff --git a/src/core/hle/kernel/k_resource_limit.h b/src/core/hle/kernel/k_resource_limit.h index 15e69af56..b733ec8f8 100644 --- a/src/core/hle/kernel/k_resource_limit.h +++ b/src/core/hle/kernel/k_resource_limit.h @@ -31,7 +31,7 @@ public: explicit KResourceLimit(KernelCore& kernel); ~KResourceLimit() override; - void Initialize(const Core::Timing::CoreTiming* core_timing); + void Initialize(); void Finalize() override; s64 GetLimitValue(LimitableResource which) const; @@ -57,7 +57,6 @@ private: mutable KLightLock m_lock; s32 m_waiter_count{}; KLightConditionVariable m_cond_var; - const Core::Timing::CoreTiming* m_core_timing{}; }; KResourceLimit* CreateResourceLimitForProcess(Core::System& system, s64 physical_memory_size); diff --git a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h index c485022f5..b62415da7 100644 --- a/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h +++ b/src/core/hle/kernel/k_scoped_scheduler_lock_and_sleep.h @@ -28,7 +28,7 @@ public: ~KScopedSchedulerLockAndSleep() { // Register the sleep. if (m_timeout_tick > 0) { - m_timer->RegisterTask(m_thread, m_timeout_tick); + m_timer->RegisterAbsoluteTask(m_thread, m_timeout_tick); } // Unlock the scheduler. diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index ebe7582c6..a1134b7e2 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -231,7 +231,7 @@ struct KernelCore::Impl { void InitializeSystemResourceLimit(KernelCore& kernel, const Core::Timing::CoreTiming& core_timing) { system_resource_limit = KResourceLimit::Create(system.Kernel()); - system_resource_limit->Initialize(&core_timing); + system_resource_limit->Initialize(); KResourceLimit::Register(kernel, system_resource_limit); const auto sizes{memory_layout->GetTotalAndKernelMemorySizes()}; diff --git a/src/core/hle/kernel/svc/svc_address_arbiter.cpp b/src/core/hle/kernel/svc/svc_address_arbiter.cpp index 04cc5ea64..90ee43521 100644 --- a/src/core/hle/kernel/svc/svc_address_arbiter.cpp +++ b/src/core/hle/kernel/svc/svc_address_arbiter.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "core/core.h" +#include "core/hle/kernel/k_hardware_timer.h" #include "core/hle/kernel/k_memory_layout.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/kernel.h" @@ -52,7 +53,7 @@ Result WaitForAddress(Core::System& system, u64 address, ArbitrationType arb_typ if (timeout_ns > 0) { const s64 offset_tick(timeout_ns); if (offset_tick > 0) { - timeout = offset_tick + 2; + timeout = system.Kernel().HardwareTimer().GetTick() + offset_tick + 2; if (timeout <= 0) { timeout = std::numeric_limits<s64>::max(); } diff --git a/src/core/hle/kernel/svc/svc_condition_variable.cpp b/src/core/hle/kernel/svc/svc_condition_variable.cpp index ca120d67e..bb678e6c5 100644 --- a/src/core/hle/kernel/svc/svc_condition_variable.cpp +++ b/src/core/hle/kernel/svc/svc_condition_variable.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "core/core.h" +#include "core/hle/kernel/k_hardware_timer.h" #include "core/hle/kernel/k_memory_layout.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/kernel.h" @@ -25,7 +26,7 @@ Result WaitProcessWideKeyAtomic(Core::System& system, u64 address, u64 cv_key, u if (timeout_ns > 0) { const s64 offset_tick(timeout_ns); if (offset_tick > 0) { - timeout = offset_tick + 2; + timeout = system.Kernel().HardwareTimer().GetTick() + offset_tick + 2; if (timeout <= 0) { timeout = std::numeric_limits<s64>::max(); } diff --git a/src/core/hle/kernel/svc/svc_ipc.cpp b/src/core/hle/kernel/svc/svc_ipc.cpp index 373ae7c8d..6b5e1cb8d 100644 --- a/src/core/hle/kernel/svc/svc_ipc.cpp +++ b/src/core/hle/kernel/svc/svc_ipc.cpp @@ -5,6 +5,7 @@ #include "common/scratch_buffer.h" #include "core/core.h" #include "core/hle/kernel/k_client_session.h" +#include "core/hle/kernel/k_hardware_timer.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_server_session.h" #include "core/hle/kernel/svc.h" @@ -82,12 +83,29 @@ Result ReplyAndReceive(Core::System& system, s32* out_index, uint64_t handles_ad R_TRY(session->SendReply()); } + // Convert the timeout from nanoseconds to ticks. + // NOTE: Nintendo does not use this conversion logic in WaitSynchronization... + s64 timeout; + if (timeout_ns > 0) { + const s64 offset_tick(timeout_ns); + if (offset_tick > 0) { + timeout = kernel.HardwareTimer().GetTick() + offset_tick + 2; + if (timeout <= 0) { + timeout = std::numeric_limits<s64>::max(); + } + } else { + timeout = std::numeric_limits<s64>::max(); + } + } else { + timeout = timeout_ns; + } + // Wait for a message. while (true) { // Wait for an object. s32 index; Result result = KSynchronizationObject::Wait(kernel, std::addressof(index), objs.data(), - num_handles, timeout_ns); + num_handles, timeout); if (result == ResultTimedOut) { R_RETURN(result); } diff --git a/src/core/hle/kernel/svc/svc_resource_limit.cpp b/src/core/hle/kernel/svc/svc_resource_limit.cpp index 732bc017e..c8e820b6a 100644 --- a/src/core/hle/kernel/svc/svc_resource_limit.cpp +++ b/src/core/hle/kernel/svc/svc_resource_limit.cpp @@ -21,7 +21,7 @@ Result CreateResourceLimit(Core::System& system, Handle* out_handle) { SCOPE_EXIT({ resource_limit->Close(); }); // Initialize the resource limit. - resource_limit->Initialize(std::addressof(system.CoreTiming())); + resource_limit->Initialize(); // Register the limit. KResourceLimit::Register(kernel, resource_limit); diff --git a/src/core/hle/kernel/svc/svc_synchronization.cpp b/src/core/hle/kernel/svc/svc_synchronization.cpp index 366e8ed4a..8ebc1bd1c 100644 --- a/src/core/hle/kernel/svc/svc_synchronization.cpp +++ b/src/core/hle/kernel/svc/svc_synchronization.cpp @@ -4,6 +4,7 @@ #include "common/scope_exit.h" #include "common/scratch_buffer.h" #include "core/core.h" +#include "core/hle/kernel/k_hardware_timer.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_readable_event.h" #include "core/hle/kernel/svc.h" @@ -83,9 +84,20 @@ Result WaitSynchronization(Core::System& system, int32_t* out_index, u64 user_ha } }); + // Convert the timeout from nanoseconds to ticks. + s64 timeout; + if (timeout_ns > 0) { + u64 ticks = kernel.HardwareTimer().GetTick(); + ticks += timeout_ns; + ticks += 2; + + timeout = ticks; + } else { + timeout = timeout_ns; + } + // Wait on the objects. - Result res = - KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout_ns); + Result res = KSynchronizationObject::Wait(kernel, out_index, objs.data(), num_handles, timeout); R_SUCCEED_IF(res == ResultSessionClosed); R_RETURN(res); diff --git a/src/core/hle/kernel/svc/svc_thread.cpp b/src/core/hle/kernel/svc/svc_thread.cpp index 92bcea72b..933b82e30 100644 --- a/src/core/hle/kernel/svc/svc_thread.cpp +++ b/src/core/hle/kernel/svc/svc_thread.cpp @@ -4,6 +4,7 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/core_timing.h" +#include "core/hle/kernel/k_hardware_timer.h" #include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_scoped_resource_reservation.h" #include "core/hle/kernel/k_thread.h" @@ -42,9 +43,9 @@ Result CreateThread(Core::System& system, Handle* out_handle, u64 entry_point, u R_UNLESS(process.CheckThreadPriority(priority), ResultInvalidPriority); // Reserve a new thread from the process resource limit (waiting up to 100ms). - KScopedResourceReservation thread_reservation( - std::addressof(process), LimitableResource::ThreadCountMax, 1, - system.CoreTiming().GetGlobalTimeNs().count() + 100000000); + KScopedResourceReservation thread_reservation(std::addressof(process), + LimitableResource::ThreadCountMax, 1, + kernel.HardwareTimer().GetTick() + 100000000); R_UNLESS(thread_reservation.Succeeded(), ResultLimitReached); // Create the thread. @@ -102,20 +103,31 @@ void ExitThread(Core::System& system) { } /// Sleep the current thread -void SleepThread(Core::System& system, s64 nanoseconds) { +void SleepThread(Core::System& system, s64 ns) { auto& kernel = system.Kernel(); - const auto yield_type = static_cast<Svc::YieldType>(nanoseconds); + const auto yield_type = static_cast<Svc::YieldType>(ns); - LOG_TRACE(Kernel_SVC, "called nanoseconds={}", nanoseconds); + LOG_TRACE(Kernel_SVC, "called nanoseconds={}", ns); // When the input tick is positive, sleep. - if (nanoseconds > 0) { + if (ns > 0) { // Convert the timeout from nanoseconds to ticks. // NOTE: Nintendo does not use this conversion logic in WaitSynchronization... + s64 timeout; + + const s64 offset_tick(ns); + if (offset_tick > 0) { + timeout = kernel.HardwareTimer().GetTick() + offset_tick + 2; + if (timeout <= 0) { + timeout = std::numeric_limits<s64>::max(); + } + } else { + timeout = std::numeric_limits<s64>::max(); + } // Sleep. // NOTE: Nintendo does not check the result of this sleep. - static_cast<void>(GetCurrentThread(kernel).Sleep(nanoseconds)); + static_cast<void>(GetCurrentThread(kernel).Sleep(timeout)); } else if (yield_type == Svc::YieldType::WithoutCoreMigration) { KScheduler::YieldWithoutCoreMigration(kernel); } else if (yield_type == Svc::YieldType::WithCoreMigration) { @@ -124,7 +136,6 @@ void SleepThread(Core::System& system, s64 nanoseconds) { KScheduler::YieldToAnyThread(kernel); } else { // Nintendo does nothing at all if an otherwise invalid value is passed. - ASSERT_MSG(false, "Unimplemented sleep yield type '{:016X}'!", nanoseconds); } } diff --git a/src/core/hle/service/sockets/sockets.h b/src/core/hle/service/sockets/sockets.h index 77426c46e..f86af01a4 100644 --- a/src/core/hle/service/sockets/sockets.h +++ b/src/core/hle/service/sockets/sockets.h @@ -18,7 +18,9 @@ enum class Errno : u32 { AGAIN = 11, INVAL = 22, MFILE = 24, + PIPE = 32, MSGSIZE = 90, + CONNABORTED = 103, CONNRESET = 104, NOTCONN = 107, TIMEDOUT = 110, diff --git a/src/core/hle/service/sockets/sockets_translate.cpp b/src/core/hle/service/sockets/sockets_translate.cpp index c1187209f..aed05250c 100644 --- a/src/core/hle/service/sockets/sockets_translate.cpp +++ b/src/core/hle/service/sockets/sockets_translate.cpp @@ -23,10 +23,14 @@ Errno Translate(Network::Errno value) { return Errno::INVAL; case Network::Errno::MFILE: return Errno::MFILE; + case Network::Errno::PIPE: + return Errno::PIPE; case Network::Errno::NOTCONN: return Errno::NOTCONN; case Network::Errno::TIMEDOUT: return Errno::TIMEDOUT; + case Network::Errno::CONNABORTED: + return Errno::CONNABORTED; case Network::Errno::CONNRESET: return Errno::CONNRESET; case Network::Errno::INPROGRESS: diff --git a/src/core/internal_network/network.cpp b/src/core/internal_network/network.cpp index bda9fa2e0..5d28300e6 100644 --- a/src/core/internal_network/network.cpp +++ b/src/core/internal_network/network.cpp @@ -39,6 +39,11 @@ namespace Network { namespace { +enum class CallType { + Send, + Other, +}; + #ifdef _WIN32 using socklen_t = int; @@ -96,7 +101,7 @@ bool EnableNonBlock(SOCKET fd, bool enable) { return ioctlsocket(fd, FIONBIO, &value) != SOCKET_ERROR; } -Errno TranslateNativeError(int e) { +Errno TranslateNativeError(int e, CallType call_type = CallType::Other) { switch (e) { case 0: return Errno::SUCCESS; @@ -112,6 +117,14 @@ Errno TranslateNativeError(int e) { return Errno::AGAIN; case WSAECONNREFUSED: return Errno::CONNREFUSED; + case WSAECONNABORTED: + if (call_type == CallType::Send) { + // Winsock yields WSAECONNABORTED from `send` in situations where Unix + // systems, and actual Switches, yield EPIPE. + return Errno::PIPE; + } else { + return Errno::CONNABORTED; + } case WSAECONNRESET: return Errno::CONNRESET; case WSAEHOSTUNREACH: @@ -198,7 +211,7 @@ bool EnableNonBlock(int fd, bool enable) { return fcntl(fd, F_SETFL, flags) == 0; } -Errno TranslateNativeError(int e) { +Errno TranslateNativeError(int e, CallType call_type = CallType::Other) { switch (e) { case 0: return Errno::SUCCESS; @@ -208,6 +221,10 @@ Errno TranslateNativeError(int e) { return Errno::INVAL; case EMFILE: return Errno::MFILE; + case EPIPE: + return Errno::PIPE; + case ECONNABORTED: + return Errno::CONNABORTED; case ENOTCONN: return Errno::NOTCONN; case EAGAIN: @@ -236,13 +253,13 @@ Errno TranslateNativeError(int e) { #endif -Errno GetAndLogLastError() { +Errno GetAndLogLastError(CallType call_type = CallType::Other) { #ifdef _WIN32 int e = WSAGetLastError(); #else int e = errno; #endif - const Errno err = TranslateNativeError(e); + const Errno err = TranslateNativeError(e, call_type); if (err == Errno::AGAIN || err == Errno::TIMEDOUT || err == Errno::INPROGRESS) { // These happen during normal operation, so only log them at debug level. LOG_DEBUG(Network, "Socket operation error: {}", Common::NativeErrorToString(e)); @@ -731,13 +748,17 @@ std::pair<s32, Errno> Socket::Send(std::span<const u8> message, int flags) { ASSERT(message.size() < static_cast<size_t>(std::numeric_limits<int>::max())); ASSERT(flags == 0); + int native_flags = 0; +#if YUZU_UNIX + native_flags |= MSG_NOSIGNAL; // do not send us SIGPIPE +#endif const auto result = send(fd, reinterpret_cast<const char*>(message.data()), - static_cast<int>(message.size()), 0); + static_cast<int>(message.size()), native_flags); if (result != SOCKET_ERROR) { return {static_cast<s32>(result), Errno::SUCCESS}; } - return {-1, GetAndLogLastError()}; + return {-1, GetAndLogLastError(CallType::Send)}; } std::pair<s32, Errno> Socket::SendTo(u32 flags, std::span<const u8> message, @@ -759,7 +780,7 @@ std::pair<s32, Errno> Socket::SendTo(u32 flags, std::span<const u8> message, return {static_cast<s32>(result), Errno::SUCCESS}; } - return {-1, GetAndLogLastError()}; + return {-1, GetAndLogLastError(CallType::Send)}; } Errno Socket::Close() { diff --git a/src/core/internal_network/network.h b/src/core/internal_network/network.h index badcb8369..c7e20ae34 100644 --- a/src/core/internal_network/network.h +++ b/src/core/internal_network/network.h @@ -33,10 +33,12 @@ enum class Errno { BADF, INVAL, MFILE, + PIPE, NOTCONN, AGAIN, CONNREFUSED, CONNRESET, + CONNABORTED, HOSTUNREACH, NETDOWN, NETUNREACH, diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index e61d9af80..c4d459077 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -50,6 +50,7 @@ set(SHADER_FILES vulkan_blit_depth_stencil.frag vulkan_color_clear.frag vulkan_color_clear.vert + vulkan_depthstencil_clear.frag vulkan_fidelityfx_fsr_easu_fp16.comp vulkan_fidelityfx_fsr_easu_fp32.comp vulkan_fidelityfx_fsr_rcas_fp16.comp diff --git a/src/video_core/host_shaders/vulkan_depthstencil_clear.frag b/src/video_core/host_shaders/vulkan_depthstencil_clear.frag new file mode 100644 index 000000000..1ac177c7e --- /dev/null +++ b/src/video_core/host_shaders/vulkan_depthstencil_clear.frag @@ -0,0 +1,12 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#version 460 core + +layout (push_constant) uniform PushConstants { + vec4 clear_depth; +}; + +void main() { + gl_FragDepth = clear_depth.x; +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index aadd6967c..1ba31be88 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1335,7 +1335,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, } const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; - const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; + const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing + : VideoCommon::ObtainBufferOperation::MarkAsWritten; const auto [buffer, offset] = buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); @@ -1344,8 +1345,12 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, const std::span copy_span{©, 1}; if constexpr (IS_IMAGE_UPLOAD) { + texture_cache.PrepareImage(image_id, true, false); image->UploadMemory(buffer->Handle(), offset, copy_span); } else { + if (offset % BytesPerBlock(image->info.format)) { + return false; + } texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, buffer_operand.address, buffer_size); } diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index f74ae972e..1032c9d12 100644 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -16,6 +16,7 @@ #include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h" #include "video_core/host_shaders/vulkan_color_clear_frag_spv.h" #include "video_core/host_shaders/vulkan_color_clear_vert_spv.h" +#include "video_core/host_shaders/vulkan_depthstencil_clear_frag_spv.h" #include "video_core/renderer_vulkan/blit_image.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -428,6 +429,7 @@ BlitImageHelper::BlitImageHelper(const Device& device_, Scheduler& scheduler_, blit_depth_stencil_frag(BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV)), clear_color_vert(BuildShader(device, VULKAN_COLOR_CLEAR_VERT_SPV)), clear_color_frag(BuildShader(device, VULKAN_COLOR_CLEAR_FRAG_SPV)), + clear_stencil_frag(BuildShader(device, VULKAN_DEPTHSTENCIL_CLEAR_FRAG_SPV)), convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)), convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)), convert_abgr8_to_d24s8_frag(BuildShader(device, CONVERT_ABGR8_TO_D24S8_FRAG_SPV)), @@ -593,6 +595,28 @@ void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_ma scheduler.InvalidateState(); } +void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear, + f32 clear_depth, u8 stencil_mask, u32 stencil_ref, + u32 stencil_compare_mask, const Region2D& dst_region) { + const BlitDepthStencilPipelineKey key{ + .renderpass = dst_framebuffer->RenderPass(), + .depth_clear = depth_clear, + .stencil_mask = stencil_mask, + .stencil_compare_mask = stencil_compare_mask, + .stencil_ref = stencil_ref, + }; + const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key); + const VkPipelineLayout layout = *clear_color_pipeline_layout; + scheduler.RequestRenderpass(dst_framebuffer); + scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); + cmdbuf.Draw(3, 1, 0, 0); + }); + scheduler.InvalidateState(); +} + void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view) { const VkPipelineLayout layout = *one_texture_pipeline_layout; @@ -820,6 +844,61 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearColorPipeline(const BlitImagePipel return *clear_color_pipelines.back(); } +VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( + const BlitDepthStencilPipelineKey& key) { + const auto it = std::ranges::find(clear_stencil_keys, key); + if (it != clear_stencil_keys.end()) { + return *clear_stencil_pipelines[std::distance(clear_stencil_keys.begin(), it)]; + } + clear_stencil_keys.push_back(key); + const std::array stages = MakeStages(*clear_color_vert, *clear_stencil_frag); + const auto stencil = VkStencilOpState{ + .failOp = VK_STENCIL_OP_KEEP, + .passOp = VK_STENCIL_OP_REPLACE, + .depthFailOp = VK_STENCIL_OP_KEEP, + .compareOp = VK_COMPARE_OP_ALWAYS, + .compareMask = key.stencil_compare_mask, + .writeMask = key.stencil_mask, + .reference = key.stencil_ref, + }; + const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .depthTestEnable = VK_FALSE, + .depthWriteEnable = key.depth_clear, + .depthCompareOp = VK_COMPARE_OP_ALWAYS, + .depthBoundsTestEnable = VK_FALSE, + .stencilTestEnable = VK_TRUE, + .front = stencil, + .back = stencil, + .minDepthBounds = 0.0f, + .maxDepthBounds = 0.0f, + }; + clear_stencil_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({ + .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .stageCount = static_cast<u32>(stages.size()), + .pStages = stages.data(), + .pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, + .pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, + .pTessellationState = nullptr, + .pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO, + .pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO, + .pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, + .pDepthStencilState = &depth_stencil_ci, + .pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO, + .pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO, + .layout = *clear_color_pipeline_layout, + .renderPass = key.renderpass, + .subpass = 0, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0, + })); + return *clear_stencil_pipelines.back(); +} + void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth) { if (pipeline) { diff --git a/src/video_core/renderer_vulkan/blit_image.h b/src/video_core/renderer_vulkan/blit_image.h index 2976a7d91..dcfe217aa 100644 --- a/src/video_core/renderer_vulkan/blit_image.h +++ b/src/video_core/renderer_vulkan/blit_image.h @@ -27,6 +27,16 @@ struct BlitImagePipelineKey { Tegra::Engines::Fermi2D::Operation operation; }; +struct BlitDepthStencilPipelineKey { + constexpr auto operator<=>(const BlitDepthStencilPipelineKey&) const noexcept = default; + + VkRenderPass renderpass; + bool depth_clear; + u8 stencil_mask; + u32 stencil_compare_mask; + u32 stencil_ref; +}; + class BlitImageHelper { public: explicit BlitImageHelper(const Device& device, Scheduler& scheduler, @@ -64,6 +74,10 @@ public: void ClearColor(const Framebuffer* dst_framebuffer, u8 color_mask, const std::array<f32, 4>& clear_color, const Region2D& dst_region); + void ClearDepthStencil(const Framebuffer* dst_framebuffer, bool depth_clear, f32 clear_depth, + u8 stencil_mask, u32 stencil_ref, u32 stencil_compare_mask, + const Region2D& dst_region); + private: void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer, const ImageView& src_image_view); @@ -76,6 +90,8 @@ private: [[nodiscard]] VkPipeline FindOrEmplaceDepthStencilPipeline(const BlitImagePipelineKey& key); [[nodiscard]] VkPipeline FindOrEmplaceClearColorPipeline(const BlitImagePipelineKey& key); + [[nodiscard]] VkPipeline FindOrEmplaceClearStencilPipeline( + const BlitDepthStencilPipelineKey& key); void ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass, bool is_target_depth); @@ -108,6 +124,7 @@ private: vk::ShaderModule blit_depth_stencil_frag; vk::ShaderModule clear_color_vert; vk::ShaderModule clear_color_frag; + vk::ShaderModule clear_stencil_frag; vk::ShaderModule convert_depth_to_float_frag; vk::ShaderModule convert_float_to_depth_frag; vk::ShaderModule convert_abgr8_to_d24s8_frag; @@ -122,6 +139,8 @@ private: std::vector<vk::Pipeline> blit_depth_stencil_pipelines; std::vector<BlitImagePipelineKey> clear_color_keys; std::vector<vk::Pipeline> clear_color_pipelines; + std::vector<BlitDepthStencilPipelineKey> clear_stencil_keys; + std::vector<vk::Pipeline> clear_stencil_pipelines; vk::Pipeline convert_d32_to_r32_pipeline; vk::Pipeline convert_r32_to_d32_pipeline; vk::Pipeline convert_d16_to_r16_pipeline; diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index a8540339d..35bf80ea3 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -126,7 +126,7 @@ struct FormatTuple { {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM {VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM {VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT - {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable | Storage}, // A2R10G10B10_UNORM + {VK_FORMAT_A2R10G10B10_UNORM_PACK32, Attachable}, // A2R10G10B10_UNORM {VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1B5G5R5_UNORM (flipped with swizzle) {VK_FORMAT_R5G5B5A1_UNORM_PACK16}, // A5B5G5R1_UNORM (specially swizzled) {VK_FORMAT_R8_UNORM, Attachable | Storage}, // R8_UNORM diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index aa59889bd..032f694bc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -428,15 +428,27 @@ void RasterizerVulkan::Clear(u32 layer_count) { if (aspect_flags == 0) { return; } - scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, - clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { - VkClearAttachment attachment; - attachment.aspectMask = aspect_flags; - attachment.colorAttachment = 0; - attachment.clearValue.depthStencil.depth = clear_depth; - attachment.clearValue.depthStencil.stencil = clear_stencil; - cmdbuf.ClearAttachments(attachment, clear_rect); - }); + + if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { + Region2D dst_region = { + Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, + Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width), + .y = clear_rect.rect.offset.y + + static_cast<s32>(clear_rect.rect.extent.height)}}; + blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth, + static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil, + regs.stencil_front_func_mask, dst_region); + } else { + scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil, + clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) { + VkClearAttachment attachment; + attachment.aspectMask = aspect_flags; + attachment.colorAttachment = 0; + attachment.clearValue.depthStencil.depth = clear_depth; + attachment.clearValue.depthStencil.stencil = clear_stencil; + cmdbuf.ClearAttachments(attachment, clear_rect); + }); + } } void RasterizerVulkan::DispatchCompute() { @@ -830,7 +842,8 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, } const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height); static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; - const auto post_op = VideoCommon::ObtainBufferOperation::DoNothing; + const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing + : VideoCommon::ObtainBufferOperation::MarkAsWritten; const auto [buffer, offset] = buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op); @@ -839,8 +852,12 @@ bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info, const std::span copy_span{©, 1}; if constexpr (IS_IMAGE_UPLOAD) { + texture_cache.PrepareImage(image_id, true, false); image->UploadMemory(buffer->Handle(), offset, copy_span); } else { + if (offset % BytesPerBlock(image->info.format)) { + return false; + } texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span, buffer_operand.address, buffer_size); } diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e9ec91265..a40825c9f 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -243,6 +243,9 @@ public: /// Create channel state. void CreateChannel(Tegra::Control::ChannelState& channel) final override; + /// Prepare an image to be used + void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + std::recursive_mutex mutex; private: @@ -387,9 +390,6 @@ private: /// Synchronize image aliases, copying data if needed void SynchronizeAliases(ImageId image_id); - /// Prepare an image to be used - void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); - /// Prepare an image view to be used void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index adde96aa5..617417040 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -71,6 +71,11 @@ constexpr std::array R8G8B8_SSCALED{ VK_FORMAT_UNDEFINED, }; +constexpr std::array VK_FORMAT_R32G32B32_SFLOAT{ + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_FORMAT_UNDEFINED, +}; + } // namespace Alternatives enum class NvidiaArchitecture { @@ -103,6 +108,8 @@ constexpr const VkFormat* GetFormatAlternatives(VkFormat format) { return Alternatives::R16G16B16_SSCALED.data(); case VK_FORMAT_R8G8B8_SSCALED: return Alternatives::R8G8B8_SSCALED.data(); + case VK_FORMAT_R32G32B32_SFLOAT: + return Alternatives::VK_FORMAT_R32G32B32_SFLOAT.data(); default: return nullptr; } @@ -130,6 +137,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(vk::Physica VK_FORMAT_A2B10G10R10_UINT_PACK32, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_FORMAT_A2B10G10R10_USCALED_PACK32, + VK_FORMAT_A2R10G10B10_UNORM_PACK32, VK_FORMAT_A8B8G8R8_SINT_PACK32, VK_FORMAT_A8B8G8R8_SNORM_PACK32, VK_FORMAT_A8B8G8R8_SRGB_PACK32, |