summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbunnei <bunneidev@gmail.com>2019-10-16 16:34:48 +0200
committerGitHub <noreply@github.com>2019-10-16 16:34:48 +0200
commitef9b31783d2c6af6ec21e5a8a4a9de4e340295c7 (patch)
treeccf16ff57bd3213bd364c6d96e1e702fe9e961a0
parentMerge pull request #2984 from lioncash/fallthrough2 (diff)
parentAsyncGpu: Address Feedback (diff)
downloadyuzu-ef9b31783d2c6af6ec21e5a8a4a9de4e340295c7.tar
yuzu-ef9b31783d2c6af6ec21e5a8a4a9de4e340295c7.tar.gz
yuzu-ef9b31783d2c6af6ec21e5a8a4a9de4e340295c7.tar.bz2
yuzu-ef9b31783d2c6af6ec21e5a8a4a9de4e340295c7.tar.lz
yuzu-ef9b31783d2c6af6ec21e5a8a4a9de4e340295c7.tar.xz
yuzu-ef9b31783d2c6af6ec21e5a8a4a9de4e340295c7.tar.zst
yuzu-ef9b31783d2c6af6ec21e5a8a4a9de4e340295c7.zip
-rw-r--r--src/core/core.cpp2
-rw-r--r--src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp4
-rw-r--r--src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp33
-rw-r--r--src/core/hle/service/nvdrv/interface.cpp4
-rw-r--r--src/core/hle/service/nvdrv/nvdrv.cpp4
-rw-r--r--src/core/hle/service/nvflinger/nvflinger.cpp10
-rw-r--r--src/video_core/gpu.cpp13
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp4
-rw-r--r--src/video_core/gpu_asynch.h1
-rw-r--r--src/video_core/gpu_synch.h1
-rw-r--r--src/video_core/gpu_thread.cpp19
-rw-r--r--src/video_core/gpu_thread.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h3
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp5
16 files changed, 67 insertions, 52 deletions
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 4d0ac72a5..ddc767e30 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -256,6 +256,8 @@ struct System::Impl {
is_powered_on = false;
exit_lock = false;
+ gpu_core->WaitIdle();
+
// Shutdown emulation session
renderer.reset();
GDBStub::Shutdown();
diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
index f764388bc..3f7b8e670 100644
--- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp
@@ -5,6 +5,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
+#include "core/core_timing.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/perf_stats.h"
@@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
transform, crop_rect};
system.GetPerfStats().EndGameFrame();
+ system.GetPerfStats().EndSystemFrame();
system.GPU().SwapBuffers(&framebuffer);
+ system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
+ system.GetPerfStats().BeginSystemFrame();
}
} // namespace Service::Nvidia::Devices
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
index eb88fee1b..b27ee0502 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp
@@ -63,16 +63,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
return NvResult::BadParameter;
}
+ u32 event_id = params.value & 0x00FF;
+
+ if (event_id >= MaxNvEvents) {
+ std::memcpy(output.data(), &params, sizeof(params));
+ return NvResult::BadParameter;
+ }
+
+ auto event = events_interface.events[event_id];
auto& gpu = system.GPU();
// This is mostly to take into account unimplemented features. As synced
// gpu is always synced.
if (!gpu.IsAsync()) {
+ event.writable->Signal();
return NvResult::Success;
}
auto lock = gpu.LockSync();
const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id);
const s32 diff = current_syncpoint_value - params.threshold;
if (diff >= 0) {
+ event.writable->Signal();
params.value = current_syncpoint_value;
std::memcpy(output.data(), &params, sizeof(params));
return NvResult::Success;
@@ -88,27 +98,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
return NvResult::Timeout;
}
- u32 event_id;
- if (is_async) {
- event_id = params.value & 0x00FF;
- if (event_id >= MaxNvEvents) {
- std::memcpy(output.data(), &params, sizeof(params));
- return NvResult::BadParameter;
- }
- } else {
- if (ctrl.fresh_call) {
- const auto result = events_interface.GetFreeEvent();
- if (result) {
- event_id = *result;
- } else {
- LOG_CRITICAL(Service_NVDRV, "No Free Events available!");
- event_id = params.value & 0x00FF;
- }
- } else {
- event_id = ctrl.event_id;
- }
- }
-
EventState status = events_interface.status[event_id];
if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) {
events_interface.SetEventStatus(event_id, EventState::Waiting);
@@ -120,7 +109,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector<u8>&
params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
}
params.value |= event_id;
- events_interface.events[event_id].writable->Clear();
+ event.writable->Clear();
gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);
if (!is_async && ctrl.fresh_call) {
ctrl.must_delay = true;
diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp
index 5e0c23602..68d139cfb 100644
--- a/src/core/hle/service/nvdrv/interface.cpp
+++ b/src/core/hle/service/nvdrv/interface.cpp
@@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) {
IPC::ResponseBuilder rb{ctx, 3, 1};
rb.Push(RESULT_SUCCESS);
if (event_id < MaxNvEvents) {
- rb.PushCopyObjects(nvdrv->GetEvent(event_id));
+ auto event = nvdrv->GetEvent(event_id);
+ event->Clear();
+ rb.PushCopyObjects(event);
rb.Push<u32>(NvResult::Success);
} else {
rb.Push<u32>(0);
diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp
index 307a7e928..7bfb99e34 100644
--- a/src/core/hle/service/nvdrv/nvdrv.cpp
+++ b/src/core/hle/service/nvdrv/nvdrv.cpp
@@ -40,8 +40,8 @@ Module::Module(Core::System& system) {
auto& kernel = system.Kernel();
for (u32 i = 0; i < MaxNvEvents; i++) {
std::string event_label = fmt::format("NVDRV::NvEvent_{}", i);
- events_interface.events[i] = Kernel::WritableEvent::CreateEventPair(
- kernel, Kernel::ResetType::Automatic, event_label);
+ events_interface.events[i] =
+ Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label);
events_interface.status[i] = EventState::Free;
events_interface.registered[i] = false;
}
diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp
index 368f83f6c..cc9522aad 100644
--- a/src/core/hle/service/nvflinger/nvflinger.cpp
+++ b/src/core/hle/service/nvflinger/nvflinger.cpp
@@ -187,14 +187,18 @@ void NVFlinger::Compose() {
MicroProfileFlip();
if (!buffer) {
- // There was no queued buffer to draw, render previous frame
- system.GetPerfStats().EndGameFrame();
- system.GPU().SwapBuffers({});
continue;
}
const auto& igbp_buffer = buffer->get().igbp_buffer;
+ const auto& gpu = system.GPU();
+ const auto& multi_fence = buffer->get().multi_fence;
+ for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
+ const auto& fence = multi_fence.fences[fence_id];
+ gpu.WaitFence(fence.id, fence.value);
+ }
+
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc)
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index fbb9105d6..095660115 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "common/assert.h"
+#include "common/microprofile.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/memory.h"
@@ -17,6 +18,8 @@
namespace Tegra {
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
@@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
return *dma_pusher;
}
+void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+ // Synced GPU, is always in sync
+ if (!is_async) {
+ return;
+ }
+ MICROPROFILE_SCOPE(GPU_wait);
+ while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
+ }
+}
+
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
syncpoints[syncpoint_id]++;
std::lock_guard lock{sync_mutex};
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29fa8e95b..dbca19f35 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -177,6 +177,12 @@ public:
/// Returns a reference to the GPU DMA pusher.
Tegra::DmaPusher& DmaPusher();
+ // Waits for the GPU to finish working
+ virtual void WaitIdle() const = 0;
+
+ /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
+ void WaitFence(u32 syncpoint_id, u32 value) const;
+
void IncrementSyncPoint(u32 syncpoint_id);
u32 GetSyncpointValue(u32 syncpoint_id) const;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index f2a3a390e..04222d060 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
+void GPUAsynch::WaitIdle() const {
+ gpu_thread.WaitIdle();
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index a12f9bac4..1241ade1d 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,7 @@ public:
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+ void WaitIdle() const override;
protected:
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 5eb1c461c..c71baee89 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,7 @@ public:
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+ void WaitIdle() const override {}
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 5f039e4fd..758a37f14 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,8 +5,6 @@
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
-#include "core/core_timing.h"
-#include "core/core_timing_util.h"
#include "core/frontend/scope_acquire_window_context.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
@@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
- synchronization_event = system.CoreTiming().RegisterEvent(
- "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
}
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
- const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
- system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
+ PushCommand(SubmitListCommand(std::move(entries)));
}
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
InvalidateRegion(addr, size);
}
+void ThreadManager::WaitIdle() const {
+ while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
+ }
+}
+
u64 ThreadManager::PushCommand(CommandData&& command_data) {
const u64 fence{++state.last_fence};
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
return fence;
}
-MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-void SynchState::WaitForSynchronization(u64 fence) {
- while (signaled_fence.load() < fence)
- ;
-}
-
} // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 3ae0ec9f3..08dc96bb3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -21,9 +21,6 @@ class DmaPusher;
namespace Core {
class System;
-namespace Timing {
-struct EventType;
-} // namespace Timing
} // namespace Core
namespace VideoCommon::GPUThread {
@@ -89,8 +86,6 @@ struct CommandDataContainer {
struct SynchState final {
std::atomic_bool is_running{true};
- void WaitForSynchronization(u64 fence);
-
using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
CommandQueue queue;
u64 last_fence{};
@@ -121,6 +116,9 @@ public:
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
+ // Wait until the gpu thread is idle.
+ void WaitIdle() const;
+
private:
/// Pushes a command to be executed by the GPU thread
u64 PushCommand(CommandData&& command_data);
@@ -128,7 +126,6 @@ private:
private:
SynchState state;
Core::System& system;
- Core::Timing::EventType* synchronization_event{};
std::thread thread;
std::thread::id thread_id;
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a85f730a8..cbcf81414 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
}
void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+ std::lock_guard lock{pages_mutex};
const u64 page_start{addr >> Memory::PAGE_BITS};
const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c10ebda3..c24a02d71 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -9,6 +9,7 @@
#include <cstddef>
#include <map>
#include <memory>
+#include <mutex>
#include <optional>
#include <tuple>
#include <utility>
@@ -230,6 +231,8 @@ private:
using CachedPageMap = boost::icl::interval_map<u64, int>;
CachedPageMap cached_pages;
+
+ std::mutex pages_mutex;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1e6ef66ab..4bbd17b12 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
RendererOpenGL::~RendererOpenGL() = default;
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- system.GetPerfStats().EndSystemFrame();
-
// Maintain the rasterizer's state as a priority
OpenGLState prev_state = OpenGLState::GetCurState();
state.AllDirty();
@@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
render_window.PollEvents();
- system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
- system.GetPerfStats().BeginSystemFrame();
-
// Restore the rasterizer state
prev_state.AllDirty();
prev_state.Apply();