diff options
author | Fernando Sahmkow <fsahmkow27@gmail.com> | 2022-01-30 10:31:13 +0100 |
---|---|---|
committer | Fernando Sahmkow <fsahmkow27@gmail.com> | 2022-10-06 21:00:52 +0200 |
commit | 668e80a9f42fb4ce0e16f6381d05bcbd286b2da1 (patch) | |
tree | a1c668d6c3d00eade849b1d31dba4116095e4c12 /src/video_core | |
parent | Texture Cache: Fix GC and GPU Modified on Joins. (diff) | |
download | yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.gz yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.bz2 yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.lz yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.xz yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.tar.zst yuzu-668e80a9f42fb4ce0e16f6381d05bcbd286b2da1.zip |
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 40 | ||||
-rw-r--r-- | src/video_core/cdma_pusher.cpp | 25 | ||||
-rw-r--r-- | src/video_core/cdma_pusher.h | 15 | ||||
-rw-r--r-- | src/video_core/control/channel_state.cpp | 2 | ||||
-rw-r--r-- | src/video_core/control/channel_state.h | 2 | ||||
-rw-r--r-- | src/video_core/control/channel_state_cache.h | 4 | ||||
-rw-r--r-- | src/video_core/control/scheduler.cpp | 2 | ||||
-rw-r--r-- | src/video_core/control/scheduler.h | 2 | ||||
-rw-r--r-- | src/video_core/dma_pusher.h | 26 | ||||
-rw-r--r-- | src/video_core/engines/puller.cpp | 65 | ||||
-rw-r--r-- | src/video_core/engines/puller.h | 1 | ||||
-rw-r--r-- | src/video_core/fence_manager.h | 12 | ||||
-rw-r--r-- | src/video_core/gpu.cpp | 197 | ||||
-rw-r--r-- | src/video_core/gpu.h | 19 | ||||
-rw-r--r-- | src/video_core/gpu_thread.cpp | 6 | ||||
-rw-r--r-- | src/video_core/gpu_thread.h | 2 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/codec.cpp (renamed from src/video_core/command_classes/codecs/codec.cpp) | 36 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/codec.h (renamed from src/video_core/command_classes/codecs/codec.h) | 14 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/h264.cpp (renamed from src/video_core/command_classes/codecs/h264.cpp) | 4 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/h264.h (renamed from src/video_core/command_classes/codecs/h264.h) | 6 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/vp8.cpp (renamed from src/video_core/command_classes/codecs/vp8.cpp) | 4 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/vp8.h (renamed from src/video_core/command_classes/codecs/vp8.h) | 5 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/vp9.cpp (renamed from src/video_core/command_classes/codecs/vp9.cpp) | 8 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/vp9.h (renamed from src/video_core/command_classes/codecs/vp9.h) | 12 | ||||
-rw-r--r-- | src/video_core/host1x/codecs/vp9_types.h (renamed from src/video_core/command_classes/codecs/vp9_types.h) | 0 | ||||
-rw-r--r-- | src/video_core/host1x/control.cpp | 35 | ||||
-rw-r--r-- | src/video_core/host1x/control.h (renamed from src/video_core/command_classes/host1x.h) | 17 | ||||
-rw-r--r-- | src/video_core/host1x/host1x.h | 33 | ||||
-rw-r--r-- | src/video_core/host1x/nvdec.cpp (renamed from src/video_core/command_classes/nvdec.cpp) | 6 | ||||
-rw-r--r-- | src/video_core/host1x/nvdec.h (renamed from src/video_core/command_classes/nvdec.h) | 7 | ||||
-rw-r--r-- | src/video_core/host1x/nvdec_common.h (renamed from src/video_core/command_classes/nvdec_common.h) | 4 | ||||
-rw-r--r-- | src/video_core/host1x/sync_manager.cpp (renamed from src/video_core/command_classes/sync_manager.cpp) | 10 | ||||
-rw-r--r-- | src/video_core/host1x/sync_manager.h (renamed from src/video_core/command_classes/sync_manager.h) | 6 | ||||
-rw-r--r-- | src/video_core/host1x/syncpoint_manager.cpp | 93 | ||||
-rw-r--r-- | src/video_core/host1x/syncpoint_manager.h | 99 | ||||
-rw-r--r-- | src/video_core/host1x/vic.cpp (renamed from src/video_core/command_classes/vic.cpp) | 9 | ||||
-rw-r--r-- | src/video_core/host1x/vic.h (renamed from src/video_core/command_classes/vic.h) | 7 |
37 files changed, 595 insertions, 240 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 35faa70a0..723f9b67c 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -4,7 +4,7 @@ add_subdirectory(host_shaders) if(LIBVA_FOUND) - set_source_files_properties(command_classes/codecs/codec.cpp + set_source_files_properties(host1x/codecs/codec.cpp PROPERTIES COMPILE_DEFINITIONS LIBVA_FOUND=1) list(APPEND FFmpeg_LIBRARIES ${LIBVA_LIBRARIES}) endif() @@ -15,24 +15,6 @@ add_library(video_core STATIC buffer_cache/buffer_cache.h cdma_pusher.cpp cdma_pusher.h - command_classes/codecs/codec.cpp - command_classes/codecs/codec.h - command_classes/codecs/h264.cpp - command_classes/codecs/h264.h - command_classes/codecs/vp8.cpp - command_classes/codecs/vp8.h - command_classes/codecs/vp9.cpp - command_classes/codecs/vp9.h - command_classes/codecs/vp9_types.h - command_classes/host1x.cpp - command_classes/host1x.h - command_classes/nvdec.cpp - command_classes/nvdec.h - command_classes/nvdec_common.h - command_classes/sync_manager.cpp - command_classes/sync_manager.h - command_classes/vic.cpp - command_classes/vic.h compatible_formats.cpp compatible_formats.h control/channel_state.cpp @@ -63,6 +45,26 @@ add_library(video_core STATIC engines/puller.cpp engines/puller.h framebuffer_config.h + host1x/codecs/codec.cpp + host1x/codecs/codec.h + host1x/codecs/h264.cpp + host1x/codecs/h264.h + host1x/codecs/vp8.cpp + host1x/codecs/vp8.h + host1x/codecs/vp9.cpp + host1x/codecs/vp9.h + host1x/codecs/vp9_types.h + host1x/control.cpp + host1x/control.h + host1x/nvdec.cpp + host1x/nvdec.h + host1x/nvdec_common.h + host1x/sync_manager.cpp + host1x/sync_manager.h + host1x/syncpoint_manager.cpp + host1x/syncpoint_manager.h + host1x/vic.cpp + host1x/vic.h macro/macro.cpp macro/macro.h macro/macro_hle.cpp diff --git a/src/video_core/cdma_pusher.cpp b/src/video_core/cdma_pusher.cpp index 8e890a85e..148126347 100644 --- a/src/video_core/cdma_pusher.cpp +++ b/src/video_core/cdma_pusher.cpp @@ -2,20 +2,22 @@ // SPDX-License-Identifier: MIT #include <bit> -#include "command_classes/host1x.h" -#include "command_classes/nvdec.h" -#include "command_classes/vic.h" #include "video_core/cdma_pusher.h" -#include "video_core/command_classes/sync_manager.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" +#include "video_core/host1x/control.h" +#include "video_core/host1x/nvdec.h" +#include "video_core/host1x/nvdec_common.h" +#include "video_core/host1x/sync_manager.h" +#include "video_core/host1x/vic.h" +#include "video_core/memory_manager.h" namespace Tegra { CDmaPusher::CDmaPusher(GPU& gpu_) - : gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)), - vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)), - host1x_processor(std::make_unique<Host1x>(gpu)), - sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {} + : gpu{gpu_}, nvdec_processor(std::make_shared<Host1x::Nvdec>(gpu)), + vic_processor(std::make_unique<Host1x::Vic>(gpu, nvdec_processor)), + host1x_processor(std::make_unique<Host1x::Control>(gpu)), + sync_manager(std::make_unique<Host1x::SyncptIncrManager>(gpu)) {} CDmaPusher::~CDmaPusher() = default; @@ -109,16 +111,17 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) { case ThiMethod::SetMethod1: LOG_DEBUG(Service_NVDRV, "VIC method 0x{:X}, Args=({})", static_cast<u32>(vic_thi_state.method_0), data); - vic_processor->ProcessMethod(static_cast<Vic::Method>(vic_thi_state.method_0), data); + vic_processor->ProcessMethod(static_cast<Host1x::Vic::Method>(vic_thi_state.method_0), + data); break; default: break; } break; - case ChClassId::Host1x: + case ChClassId::Control: // This device is mainly for syncpoint synchronization LOG_DEBUG(Service_NVDRV, "Host1X Class Method"); - host1x_processor->ProcessMethod(static_cast<Host1x::Method>(offset), data); + host1x_processor->ProcessMethod(static_cast<Host1x::Control::Method>(offset), data); break; default: UNIMPLEMENTED_MSG("Current class not implemented {:X}", static_cast<u32>(current_class)); diff --git a/src/video_core/cdma_pusher.h b/src/video_core/cdma_pusher.h index d6ffef95f..de17c2082 100644 --- a/src/video_core/cdma_pusher.h +++ b/src/video_core/cdma_pusher.h @@ -13,10 +13,13 @@ namespace Tegra { class GPU; -class Host1x; + +namespace Host1x { +class Control; class Nvdec; class SyncptIncrManager; class Vic; +} // namespace Host1x enum class ChSubmissionMode : u32 { SetClass = 0, @@ -30,7 +33,7 @@ enum class ChSubmissionMode : u32 { enum class ChClassId : u32 { NoClass = 0x0, - Host1x = 0x1, + Control = 0x1, VideoEncodeMpeg = 0x20, VideoEncodeNvEnc = 0x21, VideoStreamingVi = 0x30, @@ -102,10 +105,10 @@ private: void ThiStateWrite(ThiRegisters& state, u32 offset, u32 argument); GPU& gpu; - std::shared_ptr<Tegra::Nvdec> nvdec_processor; - std::unique_ptr<Tegra::Vic> vic_processor; - std::unique_ptr<Tegra::Host1x> host1x_processor; - std::unique_ptr<SyncptIncrManager> sync_manager; + std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; + std::unique_ptr<Tegra::Host1x::Vic> vic_processor; + std::unique_ptr<Tegra::Host1x::Control> host1x_processor; + std::unique_ptr<Host1x::SyncptIncrManager> sync_manager; ChClassId current_class{}; ThiRegisters vic_thi_state{}; ThiRegisters nvdec_thi_state{}; diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp index 67803fe94..3613c4992 100644 --- a/src/video_core/control/channel_state.cpp +++ b/src/video_core/control/channel_state.cpp @@ -1,5 +1,5 @@ // Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version +// Licensed under GPLv3 or any later version // Refer to the license.txt file included. #include "common/assert.h" diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h index 82808a6b8..08a7591e1 100644 --- a/src/video_core/control/channel_state.h +++ b/src/video_core/control/channel_state.h @@ -1,5 +1,5 @@ // Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version +// Licensed under GPLv3 or any later version // Refer to the license.txt file included. #pragma once diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h index 31d80e8b7..dbf833de7 100644 --- a/src/video_core/control/channel_state_cache.h +++ b/src/video_core/control/channel_state_cache.h @@ -1,3 +1,7 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + #pragma once #include <deque> diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp index e1abcb188..a9bb00aa7 100644 --- a/src/video_core/control/scheduler.cpp +++ b/src/video_core/control/scheduler.cpp @@ -1,5 +1,5 @@ // Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version +// Licensed under GPLv3 or any later version // Refer to the license.txt file included. #include <memory> diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h index 802e9caff..c1a773946 100644 --- a/src/video_core/control/scheduler.h +++ b/src/video_core/control/scheduler.h @@ -1,5 +1,5 @@ // Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version +// Licensed under GPLv3 or any later version // Refer to the license.txt file included. #pragma once diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index fd7c936c4..938f0f11c 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -37,24 +37,32 @@ enum class SubmissionMode : u32 { // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. // So the values you see in docs might be multiplied by 4. +// Register documentation: +// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/classes/host/cla26f.h +// +// Register Description (approx): +// https://github.com/NVIDIA/open-gpu-doc/blob/ab27fc22db5de0d02a4cabe08e555663b62db4d4/manuals/volta/gv100/dev_pbdma.ref.txt enum class BufferMethods : u32 { BindObject = 0x0, + Illegal = 0x1, Nop = 0x2, SemaphoreAddressHigh = 0x4, SemaphoreAddressLow = 0x5, - SemaphoreSequence = 0x6, - SemaphoreTrigger = 0x7, - NotifyIntr = 0x8, + SemaphoreSequencePayload = 0x6, + SemaphoreOperation = 0x7, + NonStallInterrupt = 0x8, WrcacheFlush = 0x9, - Unk28 = 0xA, - UnkCacheFlush = 0xB, + MemOpA = 0xA, + MemOpB = 0xB, + MemOpC = 0xC, + MemOpD = 0xD, RefCnt = 0x14, SemaphoreAcquire = 0x1A, SemaphoreRelease = 0x1B, - FenceValue = 0x1C, - FenceAction = 0x1D, - WaitForInterrupt = 0x1E, - Unk7c = 0x1F, + SyncpointPayload = 0x1C, + SyncpointOperation = 0x1D, + WaitForIdle = 0x1E, + CRCCheck = 0x1F, Yield = 0x20, NonPullerMethods = 0x40, }; diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 3866c8746..8c17639e4 100644 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -68,11 +68,6 @@ void Puller::ProcessFenceActionMethod() { } } -void Puller::ProcessWaitForInterruptMethod() { - // TODO(bunnei) ImplementMe - LOG_WARNING(HW_GPU, "(STUBBED) called"); -} - void Puller::ProcessSemaphoreTriggerMethod() { const auto semaphoreOperationMask = 0xF; const auto op = @@ -91,29 +86,33 @@ void Puller::ProcessSemaphoreTriggerMethod() { block.timestamp = gpu.GetTicks(); memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); } else { - const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; - if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) || - (op == GpuSemaphoreOperation::AcquireGequal && - static_cast<s32>(word - regs.semaphore_sequence) > 0) || - (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) { - // Nothing to do in this case - } else { + do { + const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; regs.acquire_source = true; regs.acquire_value = regs.semaphore_sequence; if (op == GpuSemaphoreOperation::AcquireEqual) { regs.acquire_active = true; regs.acquire_mode = false; + if (word != regs.acquire_value) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } } else if (op == GpuSemaphoreOperation::AcquireGequal) { regs.acquire_active = true; regs.acquire_mode = true; + if (word < regs.acquire_value) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } } else if (op == GpuSemaphoreOperation::AcquireMask) { - // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with - // semaphore_sequence, gives a non-0 result - LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented"); + if (word & regs.semaphore_sequence == 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + continue; + } } else { LOG_ERROR(HW_GPU, "Invalid semaphore operation"); } - } + } while (false); } } @@ -124,6 +123,7 @@ void Puller::ProcessSemaphoreRelease() { void Puller::ProcessSemaphoreAcquire() { const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); const auto value = regs.semaphore_acquire; + std::this_thread::sleep_for(std::chrono::milliseconds(5)); if (word != value) { regs.acquire_active = true; regs.acquire_value = value; @@ -146,32 +146,39 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { case BufferMethods::Nop: case BufferMethods::SemaphoreAddressHigh: case BufferMethods::SemaphoreAddressLow: - case BufferMethods::SemaphoreSequence: - case BufferMethods::UnkCacheFlush: + case BufferMethods::SemaphoreSequencePayload: case BufferMethods::WrcacheFlush: - case BufferMethods::FenceValue: + case BufferMethods::SyncpointPayload: break; case BufferMethods::RefCnt: rasterizer->SignalReference(); break; - case BufferMethods::FenceAction: + case BufferMethods::SyncpointOperation: ProcessFenceActionMethod(); break; - case BufferMethods::WaitForInterrupt: - ProcessWaitForInterruptMethod(); + case BufferMethods::WaitForIdle: + rasterizer->WaitForIdle(); break; - case BufferMethods::SemaphoreTrigger: { + case BufferMethods::SemaphoreOperation: { ProcessSemaphoreTriggerMethod(); break; } - case BufferMethods::NotifyIntr: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented"); + case BufferMethods::NonStallInterrupt: { + LOG_ERROR(HW_GPU, "Special puller engine method NonStallInterrupt not implemented"); break; } - case BufferMethods::Unk28: { - // TODO(Kmather73): Research and implement this method. - LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented"); + case BufferMethods::MemOpA: { + LOG_ERROR(HW_GPU, "Memory Operation A"); + break; + } + case BufferMethods::MemOpB: { + // Implement this better. + rasterizer->SyncGuestHost(); + break; + } + case BufferMethods::MemOpC: + case BufferMethods::MemOpD: { + LOG_ERROR(HW_GPU, "Memory Operation C,D"); break; } case BufferMethods::SemaphoreAcquire: { diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h index d948ec790..b4619e9a8 100644 --- a/src/video_core/engines/puller.h +++ b/src/video_core/engines/puller.h @@ -141,7 +141,6 @@ private: void ProcessSemaphoreAcquire(); void ProcessSemaphoreRelease(); void ProcessSemaphoreTriggerMethod(); - void ProcessWaitForInterruptMethod(); [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); /// Mapping of command subchannels to their bound engine ids diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index d658e038d..03a70e5e0 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -11,6 +11,8 @@ #include "common/common_types.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/gpu.h" +#include "video_core/host1x/host1x.h" +#include "video_core/host1x/syncpoint_manager.h" #include "video_core/rasterizer_interface.h" namespace VideoCommon { @@ -72,6 +74,7 @@ public: } void SignalSyncPoint(u32 value) { + syncpoint_manager.IncrementGuest(value); TryReleasePendingFences(); const bool should_flush = ShouldFlush(); CommitAsyncFlushes(); @@ -96,7 +99,7 @@ public: auto payload = current_fence->GetPayload(); std::memcpy(address, &payload, sizeof(payload)); } else { - gpu.IncrementSyncPoint(current_fence->GetPayload()); + syncpoint_manager.IncrementHost(current_fence->GetPayload()); } PopFence(); } @@ -106,8 +109,8 @@ protected: explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_, TTextureCache& texture_cache_, TTBufferCache& buffer_cache_, TQueryCache& query_cache_) - : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_}, - buffer_cache{buffer_cache_}, query_cache{query_cache_} {} + : rasterizer{rasterizer_}, gpu{gpu_}, syncpoint_manager{gpu.Host1x().GetSyncpointManager()}, + texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {} virtual ~FenceManager() = default; @@ -125,6 +128,7 @@ protected: VideoCore::RasterizerInterface& rasterizer; Tegra::GPU& gpu; + Tegra::Host1x::SyncpointManager& syncpoint_manager; TTextureCache& texture_cache; TTBufferCache& buffer_cache; TQueryCache& query_cache; @@ -142,7 +146,7 @@ private: const auto payload = current_fence->GetPayload(); std::memcpy(address, &payload, sizeof(payload)); } else { - gpu.IncrementSyncPoint(current_fence->GetPayload()); + syncpoint_manager.IncrementHost(current_fence->GetPayload()); } PopFence(); } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index eebd7f3ff..1097db08a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -28,6 +28,8 @@ #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" #include "video_core/gpu_thread.h" +#include "video_core/host1x/host1x.h" +#include "video_core/host1x/syncpoint_manager.h" #include "video_core/memory_manager.h" #include "video_core/renderer_base.h" #include "video_core/shader_notify.h" @@ -38,7 +40,7 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); struct GPU::Impl { explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) - : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_}, + : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_}, gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {} @@ -115,31 +117,35 @@ struct GPU::Impl { } /// Request a host GPU memory flush from the CPU. - [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size) { - std::unique_lock lck{flush_request_mutex}; - const u64 fence = ++last_flush_fence; - flush_requests.emplace_back(fence, addr, size); + template <typename Func> + [[nodiscard]] u64 RequestSyncOperation(Func&& action) { + std::unique_lock lck{sync_request_mutex}; + const u64 fence = ++last_sync_fence; + sync_requests.emplace_back(action); return fence; } /// Obtains current flush request fence id. - [[nodiscard]] u64 CurrentFlushRequestFence() const { - return current_flush_fence.load(std::memory_order_relaxed); + [[nodiscard]] u64 CurrentSyncRequestFence() const { + return current_sync_fence.load(std::memory_order_relaxed); + } + + void WaitForSyncOperation(const u64 fence) { + std::unique_lock lck{sync_request_mutex}; + sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); } /// Tick pending requests within the GPU. void TickWork() { - std::unique_lock lck{flush_request_mutex}; - while (!flush_requests.empty()) { - auto& request = flush_requests.front(); - const u64 fence = request.fence; - const VAddr addr = request.addr; - const std::size_t size = request.size; - flush_requests.pop_front(); - flush_request_mutex.unlock(); - rasterizer->FlushRegion(addr, size); - current_flush_fence.store(fence); - flush_request_mutex.lock(); + std::unique_lock lck{sync_request_mutex}; + while (!sync_requests.empty()) { + auto request = std::move(sync_requests.front()); + sync_requests.pop_front(); + sync_request_mutex.unlock(); + request(); + current_sync_fence.fetch_add(1, std::memory_order_release); + sync_request_mutex.lock(); + sync_request_cv.notify_all(); } } @@ -207,78 +213,26 @@ struct GPU::Impl { /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value) { - // Synced GPU, is always in sync - if (!is_async) { - return; - } if (syncpoint_id == UINT32_MAX) { - // TODO: Research what this does. - LOG_ERROR(HW_GPU, "Waiting for syncpoint -1 not implemented"); return; } MICROPROFILE_SCOPE(GPU_wait); - std::unique_lock lock{sync_mutex}; - sync_cv.wait(lock, [=, this] { - if (shutting_down.load(std::memory_order_relaxed)) { - // We're shutting down, ensure no threads continue to wait for the next syncpoint - return true; - } - return syncpoints.at(syncpoint_id).load() >= value; - }); + host1x.GetSyncpointManager().WaitHost(syncpoint_id, value); } void IncrementSyncPoint(u32 syncpoint_id) { - auto& syncpoint = syncpoints.at(syncpoint_id); - syncpoint++; - std::scoped_lock lock{sync_mutex}; - sync_cv.notify_all(); - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - if (!interrupt.empty()) { - u32 value = syncpoint.load(); - auto it = interrupt.begin(); - while (it != interrupt.end()) { - if (value >= *it) { - TriggerCpuInterrupt(syncpoint_id, *it); - it = interrupt.erase(it); - continue; - } - it++; - } - } + host1x.GetSyncpointManager().IncrementHost(syncpoint_id); } [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const { - return syncpoints.at(syncpoint_id).load(); + return host1x.GetSyncpointManager().GetHostSyncpointValue(syncpoint_id); } void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { - std::scoped_lock lock{sync_mutex}; - u32 current_value = syncpoints.at(syncpoint_id).load(); - if ((static_cast<s32>(current_value) - static_cast<s32>(value)) >= 0) { + auto& syncpoint_manager = host1x.GetSyncpointManager(); + syncpoint_manager.RegisterHostAction(syncpoint_id, value, [this, syncpoint_id, value]() { TriggerCpuInterrupt(syncpoint_id, value); - return; - } - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - bool contains = std::any_of(interrupt.begin(), interrupt.end(), - [value](u32 in_value) { return in_value == value; }); - if (contains) { - return; - } - interrupt.emplace_back(value); - } - - [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { - std::scoped_lock lock{sync_mutex}; - auto& interrupt = syncpt_interrupts.at(syncpoint_id); - const auto iter = - std::find_if(interrupt.begin(), interrupt.end(), - [value](u32 interrupt_value) { return value == interrupt_value; }); - - if (iter == interrupt.end()) { - return false; - } - interrupt.erase(iter); - return true; + }); } [[nodiscard]] u64 GetTicks() const { @@ -387,8 +341,48 @@ struct GPU::Impl { interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } + void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, + Service::Nvidia::NvFence* fences, size_t num_fences) { + size_t current_request_counter{}; + { + std::unique_lock<std::mutex> lk(request_swap_mutex); + if (free_swap_counters.empty()) { + current_request_counter = request_swap_counters.size(); + request_swap_counters.emplace_back(num_fences); + } else { + current_request_counter = free_swap_counters.front(); + request_swap_counters[current_request_counter] = num_fences; + free_swap_counters.pop_front(); + } + } + const auto wait_fence = + RequestSyncOperation([this, current_request_counter, framebuffer, fences, num_fences] { + auto& syncpoint_manager = host1x.GetSyncpointManager(); + if (num_fences == 0) { + renderer->SwapBuffers(framebuffer); + } + const auto executer = [this, current_request_counter, + framebuffer_copy = *framebuffer]() { + { + std::unique_lock<std::mutex> lk(request_swap_mutex); + if (--request_swap_counters[current_request_counter] != 0) { + return; + } + free_swap_counters.push_back(current_request_counter); + } + renderer->SwapBuffers(&framebuffer_copy); + }; + for (size_t i = 0; i < num_fences; i++) { + syncpoint_manager.RegisterGuestAction(fences[i].id, fences[i].value, executer); + } + }); + gpu_thread.TickGPU(); + WaitForSyncOperation(wait_fence); + } + GPU& gpu; Core::System& system; + Host1x::Host1x& host1x; std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers; std::unique_ptr<VideoCore::RendererBase> renderer; @@ -411,18 +405,11 @@ struct GPU::Impl { std::condition_variable sync_cv; - struct FlushRequest { - explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) - : fence{fence_}, addr{addr_}, size{size_} {} - u64 fence; - VAddr addr; - std::size_t size; - }; - - std::list<FlushRequest> flush_requests; - std::atomic<u64> current_flush_fence{}; - u64 last_flush_fence{}; - std::mutex flush_request_mutex; + std::list<std::function<void(void)>> sync_requests; + std::atomic<u64> current_sync_fence{}; + u64 last_sync_fence{}; + std::mutex sync_request_mutex; + std::condition_variable sync_request_cv; const bool is_async; @@ -433,6 +420,10 @@ struct GPU::Impl { std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels; Tegra::Control::ChannelState* current_channel; s32 bound_channel{-1}; + + std::deque<size_t> free_swap_counters; + std::deque<size_t> request_swap_counters; + std::mutex request_swap_mutex; }; GPU::GPU(Core::System& system, bool is_async, bool use_nvdec) @@ -477,17 +468,32 @@ void GPU::OnCommandListEnd() { } u64 GPU::RequestFlush(VAddr addr, std::size_t size) { - return impl->RequestFlush(addr, size); + return impl->RequestSyncOperation( + [this, addr, size]() { impl->rasterizer->FlushRegion(addr, size); }); +} + +u64 GPU::CurrentSyncRequestFence() const { + return impl->CurrentSyncRequestFence(); } -u64 GPU::CurrentFlushRequestFence() const { - return impl->CurrentFlushRequestFence(); +void GPU::WaitForSyncOperation(u64 fence) { + return impl->WaitForSyncOperation(fence); } void GPU::TickWork() { impl->TickWork(); } +/// Gets a mutable reference to the Host1x interface +Host1x::Host1x& GPU::Host1x() { + return impl->host1x; +} + +/// Gets an immutable reference to the Host1x interface. +const Host1x::Host1x& GPU::Host1x() const { + return impl->host1x; +} + Engines::Maxwell3D& GPU::Maxwell3D() { return impl->Maxwell3D(); } @@ -536,6 +542,11 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { return impl->ShaderNotify(); } +void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, + Service::Nvidia::NvFence* fences, size_t num_fences) { + impl->RequestSwapBuffers(framebuffer, fences, num_fences); +} + void GPU::WaitFence(u32 syncpoint_id, u32 value) { impl->WaitFence(syncpoint_id, value); } @@ -552,10 +563,6 @@ void GPU::RegisterSyncptInterrupt(u32 syncpoint_id, u32 value) { impl->RegisterSyncptInterrupt(syncpoint_id, value); } -bool GPU::CancelSyncptInterrupt(u32 syncpoint_id, u32 value) { - return impl->CancelSyncptInterrupt(syncpoint_id, value); -} - u64 GPU::GetTicks() const { return impl->GetTicks(); } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 7e84b0d2f..c1a538257 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -93,6 +93,10 @@ namespace Control { struct ChannelState; } +namespace Host1x { +class Host1x; +} // namespace Host1x + class MemoryManager; class GPU final { @@ -124,11 +128,19 @@ public: [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); /// Obtains current flush request fence id. - [[nodiscard]] u64 CurrentFlushRequestFence() const; + [[nodiscard]] u64 CurrentSyncRequestFence() const; + + void WaitForSyncOperation(u64 fence); /// Tick pending requests within the GPU. void TickWork(); + /// Gets a mutable reference to the Host1x interface + [[nodiscard]] Host1x::Host1x& Host1x(); + + /// Gets an immutable reference to the Host1x interface. + [[nodiscard]] const Host1x::Host1x& Host1x() const; + /// Returns a reference to the Maxwell3D GPU engine. [[nodiscard]] Engines::Maxwell3D& Maxwell3D(); @@ -174,8 +186,6 @@ public: void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value); - bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value); - [[nodiscard]] u64 GetTicks() const; [[nodiscard]] bool IsAsync() const; @@ -184,6 +194,9 @@ public: void RendererFrameEndNotify(); + void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, + Service::Nvidia::NvFence* fences, size_t num_fences); + /// Performs any additional setup necessary in order to begin GPU emulation. /// This can be used to launch any necessary threads and register any necessary /// core timing events. diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 9844cde43..2c03545bf 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -93,8 +93,12 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { } auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); + TickGPU(); + gpu.WaitForSyncOperation(fence); +} + +void ThreadManager::TickGPU() { PushCommand(GPUTickCommand(), true); - ASSERT(fence <= gpu.CurrentFlushRequestFence()); } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index c5078a2b3..64628d3e3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -135,6 +135,8 @@ public: void OnCommandListEnd(); + void TickGPU(); + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data, bool block = false); diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/host1x/codecs/codec.cpp index a5eb97b7f..70c47ae03 100644 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/host1x/codecs/codec.cpp @@ -6,11 +6,11 @@ #include <vector> #include "common/assert.h" #include "common/settings.h" -#include "video_core/command_classes/codecs/codec.h" -#include "video_core/command_classes/codecs/h264.h" -#include "video_core/command_classes/codecs/vp8.h" -#include "video_core/command_classes/codecs/vp9.h" #include "video_core/gpu.h" +#include "video_core/host1x/codecs/codec.h" +#include "video_core/host1x/codecs/h264.h" +#include "video_core/host1x/codecs/vp8.h" +#include "video_core/host1x/codecs/vp9.h" #include "video_core/memory_manager.h" extern "C" { @@ -73,7 +73,7 @@ void AVFrameDeleter(AVFrame* ptr) { av_frame_free(&ptr); } -Codec::Codec(GPU& gpu_, const NvdecCommon::NvdecRegisters& regs) +Codec::Codec(GPU& gpu_, const Host1x::NvdecCommon::NvdecRegisters& regs) : gpu(gpu_), state{regs}, h264_decoder(std::make_unique<Decoder::H264>(gpu)), vp8_decoder(std::make_unique<Decoder::VP8>(gpu)), vp9_decoder(std::make_unique<Decoder::VP9>(gpu)) {} @@ -168,11 +168,11 @@ void Codec::InitializeGpuDecoder() { void Codec::Initialize() { const AVCodecID codec = [&] { switch (current_codec) { - case NvdecCommon::VideoCodec::H264: + case Host1x::NvdecCommon::VideoCodec::H264: return AV_CODEC_ID_H264; - case NvdecCommon::VideoCodec::VP8: + case Host1x::NvdecCommon::VideoCodec::VP8: return AV_CODEC_ID_VP8; - case NvdecCommon::VideoCodec::VP9: + case Host1x::NvdecCommon::VideoCodec::VP9: return AV_CODEC_ID_VP9; default: UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); @@ -197,7 +197,7 @@ void Codec::Initialize() { initialized = true; } -void Codec::SetTargetCodec(NvdecCommon::VideoCodec codec) { +void Codec::SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec) { if (current_codec != codec) { current_codec = codec; LOG_INFO(Service_NVDRV, "NVDEC video codec initialized to {}", GetCurrentCodecName()); @@ -215,11 +215,11 @@ void Codec::Decode() { bool vp9_hidden_frame = false; const auto& frame_data = [&]() { switch (current_codec) { - case Tegra::NvdecCommon::VideoCodec::H264: + case Tegra::Host1x::NvdecCommon::VideoCodec::H264: return h264_decoder->ComposeFrame(state, is_first_frame); - case Tegra::NvdecCommon::VideoCodec::VP8: + case Tegra::Host1x::NvdecCommon::VideoCodec::VP8: return vp8_decoder->ComposeFrame(state); - case Tegra::NvdecCommon::VideoCodec::VP9: + case Tegra::Host1x::NvdecCommon::VideoCodec::VP9: vp9_decoder->ComposeFrame(state); vp9_hidden_frame = vp9_decoder->WasFrameHidden(); return vp9_decoder->GetFrameBytes(); @@ -287,21 +287,21 @@ AVFramePtr Codec::GetCurrentFrame() { return frame; } -NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { +Host1x::NvdecCommon::VideoCodec Codec::GetCurrentCodec() const { return current_codec; } std::string_view Codec::GetCurrentCodecName() const { switch (current_codec) { - case NvdecCommon::VideoCodec::None: + case Host1x::NvdecCommon::VideoCodec::None: return "None"; - case NvdecCommon::VideoCodec::H264: + case Host1x::NvdecCommon::VideoCodec::H264: return "H264"; - case NvdecCommon::VideoCodec::VP8: + case Host1x::NvdecCommon::VideoCodec::VP8: return "VP8"; - case NvdecCommon::VideoCodec::H265: + case Host1x::NvdecCommon::VideoCodec::H265: return "H265"; - case NvdecCommon::VideoCodec::VP9: + case Host1x::NvdecCommon::VideoCodec::VP9: return "VP9"; default: return "Unknown"; diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/host1x/codecs/codec.h index 0c2405465..117cb3ccd 100644 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/host1x/codecs/codec.h @@ -6,8 +6,8 @@ #include <memory> #include <string_view> #include <queue> - -#include "video_core/command_classes/nvdec_common.h" +#include "common/common_types.h" +#include "video_core/host1x/nvdec_common.h" extern "C" { #if defined(__GNUC__) || defined(__clang__) @@ -34,14 +34,14 @@ class VP9; class Codec { public: - explicit Codec(GPU& gpu, const NvdecCommon::NvdecRegisters& regs); + explicit Codec(GPU& gpu, const Host1x::NvdecCommon::NvdecRegisters& regs); ~Codec(); /// Initialize the codec, returning success or failure void Initialize(); /// Sets NVDEC video stream codec - void SetTargetCodec(NvdecCommon::VideoCodec codec); + void SetTargetCodec(Host1x::NvdecCommon::VideoCodec codec); /// Call decoders to construct headers, decode AVFrame with ffmpeg void Decode(); @@ -50,7 +50,7 @@ public: [[nodiscard]] AVFramePtr GetCurrentFrame(); /// Returns the value of current_codec - [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + [[nodiscard]] Host1x::NvdecCommon::VideoCodec GetCurrentCodec() const; /// Return name of the current codec [[nodiscard]] std::string_view GetCurrentCodecName() const; @@ -63,14 +63,14 @@ private: bool CreateGpuAvDevice(); bool initialized{}; - NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; + Host1x::NvdecCommon::VideoCodec current_codec{Host1x::NvdecCommon::VideoCodec::None}; const AVCodec* av_codec{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; AVBufferRef* av_gpu_decoder{nullptr}; GPU& gpu; - const NvdecCommon::NvdecRegisters& state; + const Host1x::NvdecCommon::NvdecRegisters& state; std::unique_ptr<Decoder::H264> h264_decoder; std::unique_ptr<Decoder::VP8> vp8_decoder; std::unique_ptr<Decoder::VP9> vp9_decoder; diff --git a/src/video_core/command_classes/codecs/h264.cpp b/src/video_core/host1x/codecs/h264.cpp index e2acd54d4..95534bc85 100644 --- a/src/video_core/command_classes/codecs/h264.cpp +++ b/src/video_core/host1x/codecs/h264.cpp @@ -5,8 +5,8 @@ #include <bit> #include "common/settings.h" -#include "video_core/command_classes/codecs/h264.h" #include "video_core/gpu.h" +#include "video_core/host1x/codecs/h264.h" #include "video_core/memory_manager.h" namespace Tegra::Decoder { @@ -28,7 +28,7 @@ H264::H264(GPU& gpu_) : gpu(gpu_) {} H264::~H264() = default; -const std::vector<u8>& H264::ComposeFrame(const NvdecCommon::NvdecRegisters& state, +const std::vector<u8>& H264::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame) { H264DecoderContext context; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &context, sizeof(H264DecoderContext)); diff --git a/src/video_core/command_classes/codecs/h264.h b/src/video_core/host1x/codecs/h264.h index 261574364..a98730474 100644 --- a/src/video_core/command_classes/codecs/h264.h +++ b/src/video_core/host1x/codecs/h264.h @@ -8,7 +8,7 @@ #include "common/bit_field.h" #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/command_classes/nvdec_common.h" +#include "video_core/host1x/nvdec_common.h" namespace Tegra { class GPU; @@ -59,8 +59,8 @@ public: ~H264(); /// Compose the H264 frame for FFmpeg decoding - [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state, - bool is_first_frame = false); + [[nodiscard]] const std::vector<u8>& ComposeFrame( + const Host1x::NvdecCommon::NvdecRegisters& state, bool is_first_frame = false); private: std::vector<u8> frame; diff --git a/src/video_core/command_classes/codecs/vp8.cpp b/src/video_core/host1x/codecs/vp8.cpp index c83b9bbc2..aac026e17 100644 --- a/src/video_core/command_classes/codecs/vp8.cpp +++ b/src/video_core/host1x/codecs/vp8.cpp @@ -3,8 +3,8 @@ #include <vector> -#include "video_core/command_classes/codecs/vp8.h" #include "video_core/gpu.h" +#include "video_core/host1x/codecs/vp8.h" #include "video_core/memory_manager.h" namespace Tegra::Decoder { @@ -12,7 +12,7 @@ VP8::VP8(GPU& gpu_) : gpu(gpu_) {} VP8::~VP8() = default; -const std::vector<u8>& VP8::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { +const std::vector<u8>& VP8::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { VP8PictureInfo info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &info, sizeof(VP8PictureInfo)); diff --git a/src/video_core/command_classes/codecs/vp8.h b/src/video_core/host1x/codecs/vp8.h index 3357667b0..a1dfa5f03 100644 --- a/src/video_core/command_classes/codecs/vp8.h +++ b/src/video_core/host1x/codecs/vp8.h @@ -8,7 +8,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" -#include "video_core/command_classes/nvdec_common.h" +#include "video_core/host1x/nvdec_common.h" namespace Tegra { class GPU; @@ -20,7 +20,8 @@ public: ~VP8(); /// Compose the VP8 frame for FFmpeg decoding - [[nodiscard]] const std::vector<u8>& ComposeFrame(const NvdecCommon::NvdecRegisters& state); + [[nodiscard]] const std::vector<u8>& ComposeFrame( + const Host1x::NvdecCommon::NvdecRegisters& state); private: std::vector<u8> frame; diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/host1x/codecs/vp9.cpp index c01431441..bc50c6ba4 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/host1x/codecs/vp9.cpp @@ -4,8 +4,8 @@ #include <algorithm> // for std::copy #include <numeric> #include "common/assert.h" -#include "video_core/command_classes/codecs/vp9.h" #include "video_core/gpu.h" +#include "video_core/host1x/codecs/vp9.h" #include "video_core/memory_manager.h" namespace Tegra::Decoder { @@ -355,7 +355,7 @@ void VP9::WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_ } } -Vp9PictureInfo VP9::GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state) { +Vp9PictureInfo VP9::GetVp9PictureInfo(const Host1x::NvdecCommon::NvdecRegisters& state) { PictureInfo picture_info; gpu.MemoryManager().ReadBlock(state.picture_info_offset, &picture_info, sizeof(PictureInfo)); Vp9PictureInfo vp9_info = picture_info.Convert(); @@ -376,7 +376,7 @@ void VP9::InsertEntropy(u64 offset, Vp9EntropyProbs& dst) { entropy.Convert(dst); } -Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) { +Vp9FrameContainer VP9::GetCurrentFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { Vp9FrameContainer current_frame{}; { gpu.SyncGuestHost(); @@ -769,7 +769,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { return uncomp_writer; } -void VP9::ComposeFrame(const NvdecCommon::NvdecRegisters& state) { +void VP9::ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state) { std::vector<u8> bitstream; { Vp9FrameContainer curr_frame = GetCurrentFrame(state); diff --git a/src/video_core/command_classes/codecs/vp9.h b/src/video_core/host1x/codecs/vp9.h index ecc40e8b1..a425c0fa4 100644 --- a/src/video_core/command_classes/codecs/vp9.h +++ b/src/video_core/host1x/codecs/vp9.h @@ -8,8 +8,8 @@ #include "common/common_types.h" #include "common/stream.h" -#include "video_core/command_classes/codecs/vp9_types.h" -#include "video_core/command_classes/nvdec_common.h" +#include "video_core/host1x/codecs/vp9_types.h" +#include "video_core/host1x/nvdec_common.h" namespace Tegra { class GPU; @@ -117,7 +117,7 @@ public: /// Composes the VP9 frame from the GPU state information. /// Based on the official VP9 spec documentation - void ComposeFrame(const NvdecCommon::NvdecRegisters& state); + void ComposeFrame(const Host1x::NvdecCommon::NvdecRegisters& state); /// Returns true if the most recent frame was a hidden frame. [[nodiscard]] bool WasFrameHidden() const { @@ -162,13 +162,15 @@ private: void WriteMvProbabilityUpdate(VpxRangeEncoder& writer, u8 new_prob, u8 old_prob); /// Returns VP9 information from NVDEC provided offset and size - [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo(const NvdecCommon::NvdecRegisters& state); + [[nodiscard]] Vp9PictureInfo GetVp9PictureInfo( + const Host1x::NvdecCommon::NvdecRegisters& state); /// Read and convert NVDEC provided entropy probs to Vp9EntropyProbs struct void InsertEntropy(u64 offset, Vp9EntropyProbs& dst); /// Returns frame to be decoded after buffering - [[nodiscard]] Vp9FrameContainer GetCurrentFrame(const NvdecCommon::NvdecRegisters& state); + [[nodiscard]] Vp9FrameContainer GetCurrentFrame( + const Host1x::NvdecCommon::NvdecRegisters& state); /// Use NVDEC providied information to compose the headers for the current frame [[nodiscard]] std::vector<u8> ComposeCompressedHeader(); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/host1x/codecs/vp9_types.h index bb3d8df6e..bb3d8df6e 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/host1x/codecs/vp9_types.h diff --git a/src/video_core/host1x/control.cpp b/src/video_core/host1x/control.cpp new file mode 100644 index 000000000..b72b01aa3 --- /dev/null +++ b/src/video_core/host1x/control.cpp @@ -0,0 +1,35 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "video_core/gpu.h" +#include "video_core/host1x/control.h" +#include "video_core/host1x/host1x.h" + +namespace Tegra::Host1x { + +Control::Control(GPU& gpu_) : gpu(gpu_) {} + +Control::~Control() = default; + +void Control::ProcessMethod(Method method, u32 argument) { + switch (method) { + case Method::LoadSyncptPayload32: + syncpoint_value = argument; + break; + case Method::WaitSyncpt: + case Method::WaitSyncpt32: + Execute(argument); + break; + default: + UNIMPLEMENTED_MSG("Control method 0x{:X}", static_cast<u32>(method)); + break; + } +} + +void Control::Execute(u32 data) { + gpu.Host1x().GetSyncpointManager().WaitHost(data, syncpoint_value); +} + +} // namespace Tegra::Host1x diff --git a/src/video_core/command_classes/host1x.h b/src/video_core/host1x/control.h index bb48a4381..04dac7d51 100644 --- a/src/video_core/command_classes/host1x.h +++ b/src/video_core/host1x/control.h @@ -1,5 +1,7 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later +// SPDX-FileCopyrightText: 2021 yuzu emulator team and Skyline Team and Contributors +// (https://github.com/skyline-emu/) +// SPDX-License-Identifier: GPL-3.0-or-later Licensed under GPLv3 +// or any later version Refer to the license.txt file included. #pragma once @@ -7,9 +9,12 @@ namespace Tegra { class GPU; + +namespace Host1x { + class Nvdec; -class Host1x { +class Control { public: enum class Method : u32 { WaitSyncpt = 0x8, @@ -17,8 +22,8 @@ public: WaitSyncpt32 = 0x50, }; - explicit Host1x(GPU& gpu); - ~Host1x(); + explicit Control(GPU& gpu); + ~Control(); /// Writes the method into the state, Invoke Execute() if encountered void ProcessMethod(Method method, u32 argument); @@ -31,4 +36,6 @@ private: GPU& gpu; }; +} // namespace Host1x + } // namespace Tegra diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h new file mode 100644 index 000000000..2971be286 --- /dev/null +++ b/src/video_core/host1x/host1x.h @@ -0,0 +1,33 @@ +// Copyright 2022 yuzu Emulator Project +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +#include "video_core/host1x/syncpoint_manager.h" + +namespace Tegra { + +namespace Host1x { + +class Host1x { +public: + Host1x() : syncpoint_manager{} {} + + SyncpointManager& GetSyncpointManager() { + return syncpoint_manager; + } + + const SyncpointManager& GetSyncpointManager() const { + return syncpoint_manager; + } + +private: + SyncpointManager syncpoint_manager; +}; + +} // namespace Host1x + +} // namespace Tegra diff --git a/src/video_core/command_classes/nvdec.cpp b/src/video_core/host1x/nvdec.cpp index 4fbbe3da6..5f6decd0d 100644 --- a/src/video_core/command_classes/nvdec.cpp +++ b/src/video_core/host1x/nvdec.cpp @@ -2,10 +2,10 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/assert.h" -#include "video_core/command_classes/nvdec.h" #include "video_core/gpu.h" +#include "video_core/host1x/nvdec.h" -namespace Tegra { +namespace Tegra::Host1x { #define NVDEC_REG_INDEX(field_name) \ (offsetof(NvdecCommon::NvdecRegisters, field_name) / sizeof(u64)) @@ -44,4 +44,4 @@ void Nvdec::Execute() { } } -} // namespace Tegra +} // namespace Tegra::Host1x diff --git a/src/video_core/command_classes/nvdec.h b/src/video_core/host1x/nvdec.h index 488531fc6..41ba1f7a0 100644 --- a/src/video_core/command_classes/nvdec.h +++ b/src/video_core/host1x/nvdec.h @@ -6,11 +6,13 @@ #include <memory> #include <vector> #include "common/common_types.h" -#include "video_core/command_classes/codecs/codec.h" +#include "video_core/host1x/codecs/codec.h" namespace Tegra { class GPU; +namespace Host1x { + class Nvdec { public: explicit Nvdec(GPU& gpu); @@ -30,4 +32,7 @@ private: NvdecCommon::NvdecRegisters state; std::unique_ptr<Codec> codec; }; + +} // namespace Host1x + } // namespace Tegra diff --git a/src/video_core/command_classes/nvdec_common.h b/src/video_core/host1x/nvdec_common.h index 521e5b52b..49d67ebbe 100644 --- a/src/video_core/command_classes/nvdec_common.h +++ b/src/video_core/host1x/nvdec_common.h @@ -7,7 +7,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" -namespace Tegra::NvdecCommon { +namespace Tegra::Host1x::NvdecCommon { enum class VideoCodec : u64 { None = 0x0, @@ -94,4 +94,4 @@ ASSERT_REG_POSITION(vp9_curr_frame_mvs_offset, 0x176); #undef ASSERT_REG_POSITION -} // namespace Tegra::NvdecCommon +} // namespace Tegra::Host1x::NvdecCommon diff --git a/src/video_core/command_classes/sync_manager.cpp b/src/video_core/host1x/sync_manager.cpp index 67e58046f..8694f77e2 100644 --- a/src/video_core/command_classes/sync_manager.cpp +++ b/src/video_core/host1x/sync_manager.cpp @@ -4,8 +4,12 @@ #include <algorithm> #include "sync_manager.h" #include "video_core/gpu.h" +#include "video_core/host1x/host1x.h" +#include "video_core/host1x/syncpoint_manager.h" namespace Tegra { +namespace Host1x { + SyncptIncrManager::SyncptIncrManager(GPU& gpu_) : gpu(gpu_) {} SyncptIncrManager::~SyncptIncrManager() = default; @@ -36,8 +40,12 @@ void SyncptIncrManager::IncrementAllDone() { if (!increments[done_count].complete) { break; } - gpu.IncrementSyncPoint(increments[done_count].syncpt_id); + auto& syncpoint_manager = gpu.Host1x().GetSyncpointManager(); + syncpoint_manager.IncrementGuest(increments[done_count].syncpt_id); + syncpoint_manager.IncrementHost(increments[done_count].syncpt_id); } increments.erase(increments.begin(), increments.begin() + done_count); } + +} // namespace Host1x } // namespace Tegra diff --git a/src/video_core/command_classes/sync_manager.h b/src/video_core/host1x/sync_manager.h index 6dfaae080..aba72d5c5 100644 --- a/src/video_core/command_classes/sync_manager.h +++ b/src/video_core/host1x/sync_manager.h @@ -8,7 +8,11 @@ #include "common/common_types.h" namespace Tegra { + class GPU; + +namespace Host1x { + struct SyncptIncr { u32 id; u32 class_id; @@ -44,4 +48,6 @@ private: GPU& gpu; }; +} // namespace Host1x + } // namespace Tegra diff --git a/src/video_core/host1x/syncpoint_manager.cpp b/src/video_core/host1x/syncpoint_manager.cpp new file mode 100644 index 000000000..c606b8bd0 --- /dev/null +++ b/src/video_core/host1x/syncpoint_manager.cpp @@ -0,0 +1,93 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#include "video_core/host1x/syncpoint_manager.h" + +namespace Tegra { + +namespace Host1x { + +SyncpointManager::ActionHandle SyncpointManager::RegisterAction( + std::atomic<u32>& syncpoint, std::list<RegisteredAction>& action_storage, u32 expected_value, + std::function<void(void)>& action) { + if (syncpoint.load(std::memory_order_acquire) >= expected_value) { + action(); + return {}; + } + + std::unique_lock<std::mutex> lk(guard); + if (syncpoint.load(std::memory_order_relaxed) >= expected_value) { + action(); + return {}; + } + auto it = action_storage.begin(); + while (it != action_storage.end()) { + if (it->expected_value >= expected_value) { + break; + } + ++it; + } + return action_storage.emplace(it, expected_value, action); +} + +void SyncpointManager::DeregisterAction(std::list<RegisteredAction>& action_storage, + ActionHandle& handle) { + std::unique_lock<std::mutex> lk(guard); + action_storage.erase(handle); +} + +void SyncpointManager::DeregisterGuestAction(u32 syncpoint_id, ActionHandle& handle) { + DeregisterAction(guest_action_storage[syncpoint_id], handle); +} + +void SyncpointManager::DeregisterHostAction(u32 syncpoint_id, ActionHandle& handle) { + DeregisterAction(host_action_storage[syncpoint_id], handle); +} + +void SyncpointManager::IncrementGuest(u32 syncpoint_id) { + Increment(syncpoints_guest[syncpoint_id], wait_guest_cv, guest_action_storage[syncpoint_id]); +} + +void SyncpointManager::IncrementHost(u32 syncpoint_id) { + Increment(syncpoints_host[syncpoint_id], wait_host_cv, host_action_storage[syncpoint_id]); +} + +void SyncpointManager::WaitGuest(u32 syncpoint_id, u32 expected_value) { + Wait(syncpoints_guest[syncpoint_id], wait_guest_cv, expected_value); +} + +void SyncpointManager::WaitHost(u32 syncpoint_id, u32 expected_value) { + Wait(syncpoints_host[syncpoint_id], wait_host_cv, expected_value); +} + +void SyncpointManager::Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, + std::list<RegisteredAction>& action_storage) { + auto new_value{syncpoint.fetch_add(1, std::memory_order_acq_rel) + 1}; + + std::unique_lock<std::mutex> lk(guard); + auto it = action_storage.begin(); + while (it != action_storage.end()) { + if (it->expected_value > new_value) { + break; + } + it->action(); + it = action_storage.erase(it); + } + wait_cv.notify_all(); +} + +void SyncpointManager::Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, + u32 expected_value) { + const auto pred = [&]() { return syncpoint.load(std::memory_order_acquire) >= expected_value; }; + if (pred()) { + return; + } + + std::unique_lock<std::mutex> lk(guard); + wait_cv.wait(lk, pred); +} + +} // namespace Host1x + +} // namespace Tegra diff --git a/src/video_core/host1x/syncpoint_manager.h b/src/video_core/host1x/syncpoint_manager.h new file mode 100644 index 000000000..0ecc040ab --- /dev/null +++ b/src/video_core/host1x/syncpoint_manager.h @@ -0,0 +1,99 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv3 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <atomic> +#include <condition_variable> +#include <functional> +#include <list> +#include <mutex> + +#include "common/common_types.h" + +namespace Tegra { + +namespace Host1x { + +class SyncpointManager { +public: + u32 GetGuestSyncpointValue(u32 id) { + return syncpoints_guest[id].load(std::memory_order_acquire); + } + + u32 GetHostSyncpointValue(u32 id) { + return syncpoints_host[id].load(std::memory_order_acquire); + } + + struct RegisteredAction { + RegisteredAction(u32 expected_value_, std::function<void(void)>& action_) + : expected_value{expected_value_}, action{action_} {} + u32 expected_value; + std::function<void(void)> action; + }; + using ActionHandle = std::list<RegisteredAction>::iterator; + + template <typename Func> + ActionHandle RegisterGuestAction(u32 syncpoint_id, u32 expected_value, Func&& action) { + std::function<void(void)> func(action); + return RegisterAction(syncpoints_guest[syncpoint_id], guest_action_storage[syncpoint_id], + expected_value, func); + } + + template <typename Func> + ActionHandle RegisterHostAction(u32 syncpoint_id, u32 expected_value, Func&& action) { + std::function<void(void)> func(action); + return RegisterAction(syncpoints_host[syncpoint_id], host_action_storage[syncpoint_id], + expected_value, func); + } + + void DeregisterGuestAction(u32 syncpoint_id,ActionHandle& handle); + + void DeregisterHostAction(u32 syncpoint_id,ActionHandle& handle); + + void IncrementGuest(u32 syncpoint_id); + + void IncrementHost(u32 syncpoint_id); + + void WaitGuest(u32 syncpoint_id, u32 expected_value); + + void WaitHost(u32 syncpoint_id, u32 expected_value); + + bool IsReadyGuest(u32 syncpoint_id, u32 expected_value) { + return syncpoints_guest[syncpoint_id].load(std::memory_order_acquire) >= expected_value; + } + + bool IsReadyHost(u32 syncpoint_id, u32 expected_value) { + return syncpoints_host[syncpoint_id].load(std::memory_order_acquire) >= expected_value; + } + +private: + void Increment(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, + std::list<RegisteredAction>& action_storage); + + ActionHandle RegisterAction(std::atomic<u32>& syncpoint, + std::list<RegisteredAction>& action_storage, u32 expected_value, + std::function<void(void)>& action); + + void DeregisterAction(std::list<RegisteredAction>& action_storage, ActionHandle& handle); + + void Wait(std::atomic<u32>& syncpoint, std::condition_variable& wait_cv, u32 expected_value); + + static constexpr size_t NUM_MAX_SYNCPOINTS = 192; + + std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_guest{}; + std::array<std::atomic<u32>, NUM_MAX_SYNCPOINTS> syncpoints_host{}; + + std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> guest_action_storage; + std::array<std::list<RegisteredAction>, NUM_MAX_SYNCPOINTS> host_action_storage; + + std::mutex guard; + std::condition_variable wait_guest_cv; + std::condition_variable wait_host_cv; +}; + +} // namespace Host1x + +} // namespace Tegra diff --git a/src/video_core/command_classes/vic.cpp b/src/video_core/host1x/vic.cpp index 7c17df353..a9422670a 100644 --- a/src/video_core/command_classes/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -18,14 +18,17 @@ extern "C" { #include "common/bit_field.h" #include "common/logging/log.h" -#include "video_core/command_classes/nvdec.h" -#include "video_core/command_classes/vic.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" +#include "video_core/host1x/nvdec.h" +#include "video_core/host1x/vic.h" #include "video_core/memory_manager.h" #include "video_core/textures/decoders.h" namespace Tegra { + +namespace Host1x { + namespace { enum class VideoPixelFormat : u64_le { RGBA8 = 0x1f, @@ -235,4 +238,6 @@ void Vic::WriteYUVFrame(const AVFrame* frame, const VicConfig& config) { chroma_buffer.size()); } +} // namespace Host1x + } // namespace Tegra diff --git a/src/video_core/command_classes/vic.h b/src/video_core/host1x/vic.h index 010daa6b6..c51f8af7e 100644 --- a/src/video_core/command_classes/vic.h +++ b/src/video_core/host1x/vic.h @@ -11,6 +11,9 @@ struct SwsContext; namespace Tegra { class GPU; + +namespace Host1x { + class Nvdec; union VicConfig; @@ -40,7 +43,7 @@ private: void WriteYUVFrame(const AVFrame* frame, const VicConfig& config); GPU& gpu; - std::shared_ptr<Tegra::Nvdec> nvdec_processor; + std::shared_ptr<Tegra::Host1x::Nvdec> nvdec_processor; /// Avoid reallocation of the following buffers every frame, as their /// size does not change during a stream @@ -58,4 +61,6 @@ private: s32 scaler_height{}; }; +} // namespace Host1x + } // namespace Tegra |