summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2021-11-05 15:52:31 +0100
committerFernando Sahmkow <fsahmkow27@gmail.com>2022-10-06 21:00:51 +0200
commit139ea93512aeead8a4aee3910a3de86eb109a838 (patch)
tree857643fc08617b7035656a51728c399f30c8c2cb /src/video_core
parentNVASGPU: Fix Remap. (diff)
downloadyuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar
yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar.gz
yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar.bz2
yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar.lz
yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar.xz
yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.tar.zst
yuzu-139ea93512aeead8a4aee3910a3de86eb109a838.zip
Diffstat (limited to '')
-rw-r--r--src/video_core/CMakeLists.txt8
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h103
-rw-r--r--src/video_core/control/channel_state.cpp44
-rw-r--r--src/video_core/control/channel_state.h69
-rw-r--r--src/video_core/control/channel_state_cache.cpp5
-rw-r--r--src/video_core/control/channel_state_cache.h68
-rw-r--r--src/video_core/control/channel_state_cache.inc64
-rw-r--r--src/video_core/control/scheduler.cpp31
-rw-r--r--src/video_core/control/scheduler.h38
-rw-r--r--src/video_core/dma_pusher.cpp23
-rw-r--r--src/video_core/dma_pusher.h13
-rw-r--r--src/video_core/engines/puller.cpp297
-rw-r--r--src/video_core/engines/puller.h179
-rw-r--r--src/video_core/fence_manager.h28
-rw-r--r--src/video_core/gpu.cpp468
-rw-r--r--src/video_core/gpu.h55
-rw-r--r--src/video_core/gpu_thread.cpp14
-rw-r--r--src/video_core/gpu_thread.h12
-rw-r--r--src/video_core/memory_manager.cpp5
-rw-r--r--src/video_core/query_cache.h18
-rw-r--r--src/video_core/rasterizer_interface.h9
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h4
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp39
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h9
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp4
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h6
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp87
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp13
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h22
-rw-r--r--src/video_core/shader_cache.cpp33
-rw-r--r--src/video_core/shader_cache.h15
-rw-r--r--src/video_core/texture_cache/image_base.h3
-rw-r--r--src/video_core/texture_cache/texture_cache.h209
-rw-r--r--src/video_core/texture_cache/texture_cache_base.h73
44 files changed, 1396 insertions, 779 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 5b3808351..e216c51a2 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -35,6 +35,12 @@ add_library(video_core STATIC
command_classes/vic.h
compatible_formats.cpp
compatible_formats.h
+ control/channel_state.cpp
+ control/channel_state.h
+ control/channel_state_cache.cpp
+ control/channel_state_cache.h
+ control/scheduler.cpp
+ control/scheduler.h
delayed_destruction_ring.h
dirty_flags.cpp
dirty_flags.h
@@ -54,6 +60,8 @@ add_library(video_core STATIC
engines/maxwell_3d.h
engines/maxwell_dma.cpp
engines/maxwell_dma.h
+ engines/puller.cpp
+ engines/puller.h
framebuffer_config.h
macro/macro.cpp
macro/macro.h
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index f015dae56..6b6764d72 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -5,7 +5,6 @@
#include <algorithm>
#include <array>
-#include <deque>
#include <memory>
#include <mutex>
#include <numeric>
@@ -23,6 +22,7 @@
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/buffer_cache/buffer_base.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
@@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
template <typename P>
-class BufferCache {
+class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
// Page size for caching purposes.
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
@@ -116,10 +116,7 @@ public:
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- Runtime& runtime_);
+ Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
void TickFrame();
@@ -367,9 +364,6 @@ private:
void ClearDownload(IntervalType subtract_interval);
VideoCore::RasterizerInterface& rasterizer;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
SlotVector<Buffer> slot_buffers;
@@ -444,12 +438,8 @@ private:
template <class P>
BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- Runtime& runtime_)
- : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
+ Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
+ : runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
common_ranges.clear();
@@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
template <class P>
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
- const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
- const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
+ const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
+ const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
if (!cpu_src_address || !cpu_dest_address) {
return false;
}
@@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
template <class P>
bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
- const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
+ const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
if (!cpu_dst_address) {
return false;
}
@@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
template <class P>
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
const Binding binding{
.cpu_addr = *cpu_addr,
.size = size,
@@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
if (is_indexed) {
BindHostIndexBuffer();
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
}
@@ -733,7 +723,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
enabled_storage_buffers[stage] |= 1U << ssbo_index;
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
- const auto& cbufs = maxwell3d.state.shader_stages[stage];
+ const auto& cbufs = maxwell3d->state.shader_stages[stage];
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
}
@@ -770,7 +760,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
enabled_compute_storage_buffers |= 1U << ssbo_index;
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
- const auto& launch_desc = kepler_compute.launch_description;
+ const auto& launch_desc = kepler_compute->launch_description;
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
const auto& cbufs = launch_desc.const_buffer_config;
@@ -991,19 +981,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = index_buffer.size;
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
- const u32 new_offset = offset + maxwell3d.regs.index_array.first *
- maxwell3d.regs.index_array.FormatSizeInBytes();
+ const u32 new_offset = offset + maxwell3d->regs.index_array.first *
+ maxwell3d->regs.index_array.FormatSizeInBytes();
runtime.BindIndexBuffer(buffer, new_offset, size);
} else {
- runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
- maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
- buffer, offset, size);
+ runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
+ maxwell3d->regs.index_array.first,
+ maxwell3d->regs.index_array.count, buffer, offset, size);
}
}
template <class P>
void BufferCache<P>::BindHostVertexBuffers() {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
const Binding& binding = vertex_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@@ -1014,7 +1004,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
}
flags[Dirty::VertexBuffer0 + index] = false;
- const u32 stride = maxwell3d.regs.vertex_array[index].stride;
+ const u32 stride = maxwell3d->regs.vertex_array[index].stride;
const u32 offset = buffer.Offset(binding.cpu_addr);
runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
}
@@ -1154,7 +1144,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
- if (maxwell3d.regs.tfb_enabled == 0) {
+ if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1262,8 +1252,8 @@ template <class P>
void BufferCache<P>::UpdateIndexBuffer() {
// We have to check for the dirty flags and index count
// The index count is currently changed without updating the dirty flags
- const auto& index_array = maxwell3d.regs.index_array;
- auto& flags = maxwell3d.dirty.flags;
+ const auto& index_array = maxwell3d->regs.index_array;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
return;
}
@@ -1272,7 +1262,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
const GPUVAddr gpu_addr_begin = index_array.StartAddress();
const GPUVAddr gpu_addr_end = index_array.EndAddress();
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
const u32 size = std::min(address_size, draw_size);
@@ -1289,8 +1279,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
template <class P>
void BufferCache<P>::UpdateVertexBuffers() {
- auto& flags = maxwell3d.dirty.flags;
- if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
+ auto& flags = maxwell3d->dirty.flags;
+ if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
return;
}
flags[Dirty::VertexBuffers] = false;
@@ -1302,28 +1292,15 @@ void BufferCache<P>::UpdateVertexBuffers() {
template <class P>
void BufferCache<P>::UpdateVertexBuffer(u32 index) {
- if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
+ if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
return;
}
- const auto& array = maxwell3d.regs.vertex_array[index];
- const auto& limit = maxwell3d.regs.vertex_array_limit[index];
+ const auto& array = maxwell3d->regs.vertex_array[index];
+ const auto& limit = maxwell3d->regs.vertex_array_limit[index];
const GPUVAddr gpu_addr_begin = array.StartAddress();
const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
- u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
- if (address_size >= 64_MiB) {
- // Reported vertex buffer size is very large, cap to mapped buffer size
- GPUVAddr submapped_addr_end = gpu_addr_begin;
-
- const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
- if (ranges.size() > 0) {
- const auto& [addr, size] = *ranges.begin();
- submapped_addr_end = addr + size;
- }
-
- address_size =
- std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
- }
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
+ const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
const u32 size = address_size; // TODO: Analyze stride and number of vertices
if (array.enable == 0 || size == 0 || !cpu_addr) {
vertex_buffers[index] = NULL_BINDING;
@@ -1382,7 +1359,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
- if (maxwell3d.regs.tfb_enabled == 0) {
+ if (maxwell3d->regs.tfb_enabled == 0) {
return;
}
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
@@ -1392,10 +1369,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
template <class P>
void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
- const auto& binding = maxwell3d.regs.tfb_bindings[index];
+ const auto& binding = maxwell3d->regs.tfb_bindings[index];
const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
const u32 size = binding.buffer_size;
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
transform_feedback_buffers[index] = NULL_BINDING;
return;
@@ -1414,10 +1391,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
Binding& binding = compute_uniform_buffers[index];
binding = NULL_BINDING;
- const auto& launch_desc = kepler_compute.launch_description;
+ const auto& launch_desc = kepler_compute->launch_description;
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
const auto& cbuf = launch_desc.const_buffer_config[index];
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
if (cpu_addr) {
binding.cpu_addr = *cpu_addr;
binding.size = cbuf.size;
@@ -1831,7 +1808,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
dirty_uniform_buffers.fill(~u32{0});
uniform_buffer_binding_sizes.fill({});
}
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
flags[Dirty::IndexBuffer] = true;
flags[Dirty::VertexBuffers] = true;
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
@@ -1842,9 +1819,9 @@ void BufferCache<P>::NotifyBufferDeletion() {
template <class P>
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
- const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
- const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
+ const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr || size == 0) {
return NULL_BINDING;
}
@@ -1859,7 +1836,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
template <class P>
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
TextureBufferBinding binding;
if (!cpu_addr || size == 0) {
binding.cpu_addr = 0;
diff --git a/src/video_core/control/channel_state.cpp b/src/video_core/control/channel_state.cpp
new file mode 100644
index 000000000..67803fe94
--- /dev/null
+++ b/src/video_core/control/channel_state.cpp
@@ -0,0 +1,44 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/memory_manager.h"
+
+namespace Tegra::Control {
+
+ChannelState::ChannelState(s32 bind_id_) {
+ bind_id = bind_id_;
+ initiated = false;
+}
+
+void ChannelState::Init(Core::System& system, GPU& gpu) {
+ ASSERT(memory_manager);
+ dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
+ maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
+ fermi_2d = std::make_unique<Engines::Fermi2D>();
+ kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
+ maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
+ kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
+ initiated = true;
+}
+
+void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+ dma_pusher->BindRasterizer(rasterizer);
+ memory_manager->BindRasterizer(rasterizer);
+ maxwell_3d->BindRasterizer(rasterizer);
+ fermi_2d->BindRasterizer(rasterizer);
+ kepler_memory->BindRasterizer(rasterizer);
+ kepler_compute->BindRasterizer(rasterizer);
+ maxwell_dma->BindRasterizer(rasterizer);
+}
+
+} // namespace Tegra::Control
diff --git a/src/video_core/control/channel_state.h b/src/video_core/control/channel_state.h
new file mode 100644
index 000000000..82808a6b8
--- /dev/null
+++ b/src/video_core/control/channel_state.h
@@ -0,0 +1,69 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra {
+
+class GPU;
+
+namespace Engines {
+class Puller;
+class Fermi2D;
+class Maxwell3D;
+class MaxwellDMA;
+class KeplerCompute;
+class KeplerMemory;
+} // namespace Engines
+
+class MemoryManager;
+class DmaPusher;
+
+namespace Control {
+
+struct ChannelState {
+ ChannelState(s32 bind_id);
+ ChannelState(const ChannelState& state) = delete;
+ ChannelState& operator=(const ChannelState&) = delete;
+ ChannelState(ChannelState&& other) noexcept = default;
+ ChannelState& operator=(ChannelState&& other) noexcept = default;
+
+ void Init(Core::System& system, GPU& gpu);
+
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+ s32 bind_id = -1;
+ /// 3D engine
+ std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
+ /// 2D engine
+ std::unique_ptr<Engines::Fermi2D> fermi_2d;
+ /// Compute engine
+ std::unique_ptr<Engines::KeplerCompute> kepler_compute;
+ /// DMA engine
+ std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
+ /// Inline memory engine
+ std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+
+ std::shared_ptr<MemoryManager> memory_manager;
+
+ std::unique_ptr<DmaPusher> dma_pusher;
+
+ bool initiated{};
+};
+
+} // namespace Control
+
+} // namespace Tegra
diff --git a/src/video_core/control/channel_state_cache.cpp b/src/video_core/control/channel_state_cache.cpp
new file mode 100644
index 000000000..f72a97b2f
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.cpp
@@ -0,0 +1,5 @@
+#include "video_core/control/channel_state_cache.inc"
+
+namespace VideoCommon {
+template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
+}
diff --git a/src/video_core/control/channel_state_cache.h b/src/video_core/control/channel_state_cache.h
new file mode 100644
index 000000000..c8298c003
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.h
@@ -0,0 +1,68 @@
+#pragma once
+
+#include <deque>
+#include <limits>
+#include <unordered_map>
+
+#include "common/common_types.h"
+
+namespace Tegra {
+
+namespace Engines {
+class Maxwell3D;
+class KeplerCompute;
+} // namespace Engines
+
+class MemoryManager;
+
+namespace Control {
+struct ChannelState;
+}
+
+} // namespace Tegra
+
+namespace VideoCommon {
+
+class ChannelInfo {
+public:
+ ChannelInfo() = delete;
+ ChannelInfo(Tegra::Control::ChannelState& state);
+ ChannelInfo(const ChannelInfo& state) = delete;
+ ChannelInfo& operator=(const ChannelInfo&) = delete;
+ ChannelInfo(ChannelInfo&& other) = default;
+ ChannelInfo& operator=(ChannelInfo&& other) = default;
+
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+};
+
+template <class P>
+class ChannelSetupCaches {
+public:
+ /// Operations for seting the channel of execution.
+
+ /// Create channel state.
+ void CreateChannel(Tegra::Control::ChannelState& channel);
+
+ /// Bind a channel for execution.
+ void BindToChannel(s32 id);
+
+ /// Erase channel's state.
+ void EraseChannel(s32 id);
+
+protected:
+ static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
+ std::deque<P> channel_storage;
+ std::deque<size_t> free_channel_ids;
+ std::unordered_map<s32, size_t> channel_map;
+
+ P* channel_state;
+ size_t current_channel_id{UNSET_CHANNEL};
+ Tegra::Engines::Maxwell3D* maxwell3d;
+ Tegra::Engines::KeplerCompute* kepler_compute;
+ Tegra::MemoryManager* gpu_memory;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/channel_state_cache.inc b/src/video_core/control/channel_state_cache.inc
new file mode 100644
index 000000000..3eb73af9f
--- /dev/null
+++ b/src/video_core/control/channel_state_cache.inc
@@ -0,0 +1,64 @@
+#include "video_core/control/channel_state.h"
+#include "video_core/control/channel_state_cache.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+
+namespace VideoCommon {
+
+ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
+ : maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
+ gpu_memory{*channel_state.memory_manager} {}
+
+template <class P>
+void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+ ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+ auto new_id = [this, &channel]() {
+ if (!free_channel_ids.empty()) {
+ auto id = free_channel_ids.front();
+ free_channel_ids.pop_front();
+ new (&channel_storage[id]) ChannelInfo(channel);
+ return id;
+ }
+ channel_storage.emplace_back(channel);
+ return channel_storage.size() - 1;
+ }();
+ channel_map.emplace(channel.bind_id, new_id);
+ if (current_channel_id != UNSET_CHANNEL) {
+ channel_state = &channel_storage[current_channel_id];
+ }
+}
+
+/// Bind a channel for execution.
+template <class P>
+void ChannelSetupCaches<P>::BindToChannel(s32 id) {
+ auto it = channel_map.find(id);
+ ASSERT(it != channel_map.end() && id >= 0);
+ current_channel_id = it->second;
+ channel_state = &channel_storage[current_channel_id];
+ maxwell3d = &channel_state->maxwell3d;
+ kepler_compute = &channel_state->kepler_compute;
+ gpu_memory = &channel_state->gpu_memory;
+}
+
+/// Erase channel's channel_state.
+template <class P>
+void ChannelSetupCaches<P>::EraseChannel(s32 id) {
+ const auto it = channel_map.find(id);
+ ASSERT(it != channel_map.end() && id >= 0);
+ const auto this_id = it->second;
+ free_channel_ids.push_back(this_id);
+ channel_map.erase(it);
+ if (this_id == current_channel_id) {
+ current_channel_id = UNSET_CHANNEL;
+ channel_state = nullptr;
+ maxwell3d = nullptr;
+ kepler_compute = nullptr;
+ gpu_memory = nullptr;
+ } else if (current_channel_id != UNSET_CHANNEL) {
+ channel_state = &channel_storage[current_channel_id];
+ }
+}
+
+
+} // namespace VideoCommon
diff --git a/src/video_core/control/scheduler.cpp b/src/video_core/control/scheduler.cpp
new file mode 100644
index 000000000..e1abcb188
--- /dev/null
+++ b/src/video_core/control/scheduler.cpp
@@ -0,0 +1,31 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
+#include "video_core/gpu.h"
+
+namespace Tegra::Control {
+Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
+
+Scheduler::~Scheduler() = default;
+
+void Scheduler::Push(s32 channel, CommandList&& entries) {
+ std::unique_lock<std::mutex> lk(scheduling_guard);
+ auto it = channels.find(channel);
+ auto channel_state = it->second;
+ gpu.BindChannel(channel_state->bind_id);
+ channel_state->dma_pusher->Push(std::move(entries));
+ channel_state->dma_pusher->DispatchCalls();
+}
+
+void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
+ s32 channel = new_channel->bind_id;
+ std::unique_lock<std::mutex> lk(scheduling_guard);
+ channels.emplace(channel, new_channel);
+}
+
+} // namespace Tegra::Control
diff --git a/src/video_core/control/scheduler.h b/src/video_core/control/scheduler.h
new file mode 100644
index 000000000..802e9caff
--- /dev/null
+++ b/src/video_core/control/scheduler.h
@@ -0,0 +1,38 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "video_core/dma_pusher.h"
+
+namespace Tegra {
+
+class GPU;
+
+namespace Control {
+
+struct ChannelState;
+
+class Scheduler {
+public:
+ Scheduler(GPU& gpu_);
+ ~Scheduler();
+
+ void Push(s32 channel, CommandList&& entries);
+
+ void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
+
+private:
+ std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
+ std::mutex scheduling_guard;
+ GPU& gpu;
+};
+
+} // namespace Control
+
+} // namespace Tegra
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 29b8582ab..b01f04d0c 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -12,7 +12,10 @@
namespace Tegra {
-DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
+DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+ Control::ChannelState& channel_state_)
+ : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
+ *this, channel_state_} {}
DmaPusher::~DmaPusher() = default;
@@ -76,11 +79,11 @@ bool DmaPusher::Step() {
// Push buffer non-empty, read a word
command_headers.resize(command_list_header.size);
if (Settings::IsGPULevelHigh()) {
- gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ memory_manager.ReadBlock(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
} else {
- gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
- command_list_header.size * sizeof(u32));
+ memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
+ command_list_header.size * sizeof(u32));
}
}
for (std::size_t index = 0; index < command_headers.size();) {
@@ -154,7 +157,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
void DmaPusher::CallMethod(u32 argument) const {
if (dma_state.method < non_puller_methods) {
- gpu.CallMethod(GPU::MethodCall{
+ puller.CallPullerMethod(Engines::Puller::MethodCall{
dma_state.method,
argument,
dma_state.subchannel,
@@ -168,12 +171,16 @@ void DmaPusher::CallMethod(u32 argument) const {
void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
if (dma_state.method < non_puller_methods) {
- gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
- dma_state.method_count);
+ puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
+ dma_state.method_count);
} else {
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
num_methods, dma_state.method_count);
}
}
+void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
+ puller.BindRasterizer(rasterizer);
+}
+
} // namespace Tegra
diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h
index 872fd146a..fd7c936c4 100644
--- a/src/video_core/dma_pusher.h
+++ b/src/video_core/dma_pusher.h
@@ -10,6 +10,7 @@
#include "common/bit_field.h"
#include "common/common_types.h"
#include "video_core/engines/engine_interface.h"
+#include "video_core/engines/puller.h"
namespace Core {
class System;
@@ -17,7 +18,12 @@ class System;
namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+
class GPU;
+class MemoryManager;
enum class SubmissionMode : u32 {
IncreasingOld = 0,
@@ -102,7 +108,8 @@ struct CommandList final {
*/
class DmaPusher final {
public:
- explicit DmaPusher(Core::System& system_, GPU& gpu_);
+ explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
+ Control::ChannelState& channel_state_);
~DmaPusher();
void Push(CommandList&& entries) {
@@ -115,6 +122,8 @@ public:
subchannels[subchannel_id] = engine;
}
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
private:
static constexpr u32 non_puller_methods = 0x40;
static constexpr u32 max_subchannels = 8;
@@ -148,6 +157,8 @@ private:
GPU& gpu;
Core::System& system;
+ MemoryManager& memory_manager;
+ mutable Engines::Puller puller;
};
} // namespace Tegra
diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp
new file mode 100644
index 000000000..37f2ced18
--- /dev/null
+++ b/src/video_core/engines/puller.cpp
@@ -0,0 +1,297 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/settings.h"
+#include "core/core.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/dma_pusher.h"
+#include "video_core/engines/fermi_2d.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/kepler_memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/maxwell_dma.h"
+#include "video_core/engines/puller.h"
+#include "video_core/gpu.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace Tegra::Engines {
+
+Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
+ Control::ChannelState& channel_state_)
+ : gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
+ channel_state_} {}
+
+Puller::~Puller() = default;
+
+void Puller::ProcessBindMethod(const MethodCall& method_call) {
+ // Bind the current subchannel to the desired engine id.
+ LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
+ method_call.argument);
+ const auto engine_id = static_cast<EngineID>(method_call.argument);
+ bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
+ switch (engine_id) {
+ case EngineID::FERMI_TWOD_A:
+ dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
+ break;
+ case EngineID::MAXWELL_B:
+ dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
+ }
+}
+
+void Puller::ProcessFenceActionMethod() {
+ switch (regs.fence_action.op) {
+ case Puller::FenceOperation::Acquire:
+ // UNIMPLEMENTED_MSG("Channel Scheduling pending.");
+ // WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
+ break;
+ case Puller::FenceOperation::Increment:
+ rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
+ }
+}
+
+void Puller::ProcessWaitForInterruptMethod() {
+ // TODO(bunnei) ImplementMe
+ LOG_WARNING(HW_GPU, "(STUBBED) called");
+}
+
+void Puller::ProcessSemaphoreTriggerMethod() {
+ const auto semaphoreOperationMask = 0xF;
+ const auto op =
+ static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
+ if (op == GpuSemaphoreOperation::WriteLong) {
+ struct Block {
+ u32 sequence;
+ u32 zeros = 0;
+ u64 timestamp;
+ };
+
+ Block block{};
+ block.sequence = regs.semaphore_sequence;
+ // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
+ // CoreTiming
+ block.timestamp = gpu.GetTicks();
+ memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
+ } else {
+ const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
+ if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
+ (op == GpuSemaphoreOperation::AcquireGequal &&
+ static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
+ (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
+ // Nothing to do in this case
+ } else {
+ regs.acquire_source = true;
+ regs.acquire_value = regs.semaphore_sequence;
+ if (op == GpuSemaphoreOperation::AcquireEqual) {
+ regs.acquire_active = true;
+ regs.acquire_mode = false;
+ } else if (op == GpuSemaphoreOperation::AcquireGequal) {
+ regs.acquire_active = true;
+ regs.acquire_mode = true;
+ } else if (op == GpuSemaphoreOperation::AcquireMask) {
+ // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
+ // semaphore_sequence, gives a non-0 result
+ LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
+ } else {
+ LOG_ERROR(HW_GPU, "Invalid semaphore operation");
+ }
+ }
+ }
+}
+
+void Puller::ProcessSemaphoreRelease() {
+ memory_manager.Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
+}
+
+void Puller::ProcessSemaphoreAcquire() {
+ const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
+ const auto value = regs.semaphore_acquire;
+ if (word != value) {
+ regs.acquire_active = true;
+ regs.acquire_value = value;
+ // TODO(kemathe73) figure out how to do the acquire_timeout
+ regs.acquire_mode = false;
+ regs.acquire_source = false;
+ }
+}
+
+/// Calls a GPU puller method.
+void Puller::CallPullerMethod(const MethodCall& method_call) {
+ regs.reg_array[method_call.method] = method_call.argument;
+ const auto method = static_cast<BufferMethods>(method_call.method);
+
+ switch (method) {
+ case BufferMethods::BindObject: {
+ ProcessBindMethod(method_call);
+ break;
+ }
+ case BufferMethods::Nop:
+ case BufferMethods::SemaphoreAddressHigh:
+ case BufferMethods::SemaphoreAddressLow:
+ case BufferMethods::SemaphoreSequence:
+ case BufferMethods::UnkCacheFlush:
+ case BufferMethods::WrcacheFlush:
+ case BufferMethods::FenceValue:
+ break;
+ case BufferMethods::RefCnt:
+ rasterizer->SignalReference();
+ break;
+ case BufferMethods::FenceAction:
+ ProcessFenceActionMethod();
+ break;
+ case BufferMethods::WaitForInterrupt:
+ ProcessWaitForInterruptMethod();
+ break;
+ case BufferMethods::SemaphoreTrigger: {
+ ProcessSemaphoreTriggerMethod();
+ break;
+ }
+ case BufferMethods::NotifyIntr: {
+ // TODO(Kmather73): Research and implement this method.
+ LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
+ break;
+ }
+ case BufferMethods::Unk28: {
+ // TODO(Kmather73): Research and implement this method.
+ LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
+ break;
+ }
+ case BufferMethods::SemaphoreAcquire: {
+ ProcessSemaphoreAcquire();
+ break;
+ }
+ case BufferMethods::SemaphoreRelease: {
+ ProcessSemaphoreRelease();
+ break;
+ }
+ case BufferMethods::Yield: {
+ // TODO(Kmather73): Research and implement this method.
+ LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
+ break;
+ }
+ default:
+ LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
+ break;
+ }
+}
+
+/// Calls a GPU engine method.
+void Puller::CallEngineMethod(const MethodCall& method_call) {
+ const EngineID engine = bound_engines[method_call.subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::MAXWELL_B:
+ channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
+ method_call.IsLastCall());
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine");
+ }
+}
+
+/// Calls a GPU engine multivalue method.
+void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending) {
+ const EngineID engine = bound_engines[subchannel];
+
+ switch (engine) {
+ case EngineID::FERMI_TWOD_A:
+ channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::MAXWELL_B:
+ channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::KEPLER_COMPUTE_B:
+ channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::MAXWELL_DMA_COPY_A:
+ channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ case EngineID::KEPLER_INLINE_TO_MEMORY_B:
+ channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
+ break;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented engine");
+ }
+}
+
+/// Calls a GPU method.
+void Puller::CallMethod(const MethodCall& method_call) {
+ LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
+ method_call.subchannel);
+
+ ASSERT(method_call.subchannel < bound_engines.size());
+
+ if (ExecuteMethodOnEngine(method_call.method)) {
+ CallEngineMethod(method_call);
+ } else {
+ CallPullerMethod(method_call);
+ }
+}
+
+/// Calls a GPU multivalue method.
+void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending) {
+ LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+
+ ASSERT(subchannel < bound_engines.size());
+
+ if (ExecuteMethodOnEngine(method)) {
+ CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
+ } else {
+ for (std::size_t i = 0; i < amount; i++) {
+ CallPullerMethod(MethodCall{
+ method,
+ base_start[i],
+ subchannel,
+ methods_pending - static_cast<u32>(i),
+ });
+ }
+ }
+}
+
+void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
+ rasterizer = rasterizer_;
+}
+
+/// Determines where the method should be executed.
+[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
+ const auto buffer_method = static_cast<BufferMethods>(method);
+ return buffer_method >= BufferMethods::NonPullerMethods;
+}
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/puller.h b/src/video_core/engines/puller.h
new file mode 100644
index 000000000..d948ec790
--- /dev/null
+++ b/src/video_core/engines/puller.h
@@ -0,0 +1,179 @@
+// Copyright 2021 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <cstddef>
+#include <vector>
+#include "common/bit_field.h"
+#include "common/common_funcs.h"
+#include "common/common_types.h"
+#include "video_core/engines/engine_interface.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+class DmaPusher;
+
+enum class EngineID {
+ FERMI_TWOD_A = 0x902D, // 2D Engine
+ MAXWELL_B = 0xB197, // 3D Engine
+ KEPLER_COMPUTE_B = 0xB1C0,
+ KEPLER_INLINE_TO_MEMORY_B = 0xA140,
+ MAXWELL_DMA_COPY_A = 0xB0B5,
+};
+
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Tegra::Engines {
+
+class Puller final {
+public:
+ struct MethodCall {
+ u32 method{};
+ u32 argument{};
+ u32 subchannel{};
+ u32 method_count{};
+
+ explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
+ : method(method_), argument(argument_), subchannel(subchannel_),
+ method_count(method_count_) {}
+
+ [[nodiscard]] bool IsLastCall() const {
+ return method_count <= 1;
+ }
+ };
+
+ enum class FenceOperation : u32 {
+ Acquire = 0,
+ Increment = 1,
+ };
+
+ union FenceAction {
+ u32 raw;
+ BitField<0, 1, FenceOperation> op;
+ BitField<8, 24, u32> syncpoint_id;
+ };
+
+ explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
+ Control::ChannelState& channel_state);
+ ~Puller();
+
+ void CallMethod(const MethodCall& method_call);
+
+ void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending);
+
+ void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
+
+ void CallPullerMethod(const MethodCall& method_call);
+
+ void CallEngineMethod(const MethodCall& method_call);
+
+ void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+ u32 methods_pending);
+
+private:
+ Tegra::GPU& gpu;
+
+ MemoryManager& memory_manager;
+ DmaPusher& dma_pusher;
+ Control::ChannelState& channel_state;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
+ static constexpr std::size_t NUM_REGS = 0x800;
+ struct Regs {
+ static constexpr size_t NUM_REGS = 0x40;
+
+ union {
+ struct {
+ INSERT_PADDING_WORDS_NOINIT(0x4);
+ struct {
+ u32 address_high;
+ u32 address_low;
+
+ [[nodiscard]] GPUVAddr SemaphoreAddress() const {
+ return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+ address_low);
+ }
+ } semaphore_address;
+
+ u32 semaphore_sequence;
+ u32 semaphore_trigger;
+ INSERT_PADDING_WORDS_NOINIT(0xC);
+
+ // The pusher and the puller share the reference counter, the pusher only has read
+ // access
+ u32 reference_count;
+ INSERT_PADDING_WORDS_NOINIT(0x5);
+
+ u32 semaphore_acquire;
+ u32 semaphore_release;
+ u32 fence_value;
+ FenceAction fence_action;
+ INSERT_PADDING_WORDS_NOINIT(0xE2);
+
+ // Puller state
+ u32 acquire_mode;
+ u32 acquire_source;
+ u32 acquire_active;
+ u32 acquire_timeout;
+ u32 acquire_value;
+ };
+ std::array<u32, NUM_REGS> reg_array;
+ };
+ } regs{};
+
+ void ProcessBindMethod(const MethodCall& method_call);
+ void ProcessFenceActionMethod();
+ void ProcessSemaphoreAcquire();
+ void ProcessSemaphoreRelease();
+ void ProcessSemaphoreTriggerMethod();
+ void ProcessWaitForInterruptMethod();
+ [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
+
+ /// Mapping of command subchannels to their bound engine ids
+ std::array<EngineID, 8> bound_engines{};
+
+ enum class GpuSemaphoreOperation {
+ AcquireEqual = 0x1,
+ WriteLong = 0x2,
+ AcquireGequal = 0x4,
+ AcquireMask = 0x8,
+ };
+
+#define ASSERT_REG_POSITION(field_name, position) \
+ static_assert(offsetof(Regs, field_name) == position * 4, \
+ "Field " #field_name " has invalid position")
+
+ ASSERT_REG_POSITION(semaphore_address, 0x4);
+ ASSERT_REG_POSITION(semaphore_sequence, 0x6);
+ ASSERT_REG_POSITION(semaphore_trigger, 0x7);
+ ASSERT_REG_POSITION(reference_count, 0x14);
+ ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
+ ASSERT_REG_POSITION(semaphore_release, 0x1B);
+ ASSERT_REG_POSITION(fence_value, 0x1C);
+ ASSERT_REG_POSITION(fence_action, 0x1D);
+
+ ASSERT_REG_POSITION(acquire_mode, 0x100);
+ ASSERT_REG_POSITION(acquire_source, 0x101);
+ ASSERT_REG_POSITION(acquire_active, 0x102);
+ ASSERT_REG_POSITION(acquire_timeout, 0x103);
+ ASSERT_REG_POSITION(acquire_value, 0x104);
+
+#undef ASSERT_REG_POSITION
+};
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 1e9832ddd..d658e038d 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -4,12 +4,13 @@
#pragma once
#include <algorithm>
+#include <cstring>
+#include <memory>
#include <queue>
#include "common/common_types.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/gpu.h"
-#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
namespace VideoCommon {
@@ -19,10 +20,10 @@ public:
explicit FenceBase(u32 payload_, bool is_stubbed_)
: address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
- explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+ explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
: address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
- GPUVAddr GetAddress() const {
+ u8* GetAddress() const {
return address;
}
@@ -35,7 +36,7 @@ public:
}
private:
- GPUVAddr address;
+ u8* address;
u32 payload;
bool is_semaphore;
@@ -57,7 +58,7 @@ public:
buffer_cache.AccumulateFlushes();
}
- void SignalSemaphore(GPUVAddr addr, u32 value) {
+ void SignalSemaphore(u8* addr, u32 value) {
TryReleasePendingFences();
const bool should_flush = ShouldFlush();
CommitAsyncFlushes();
@@ -91,8 +92,9 @@ public:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- gpu_memory.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ char* address = reinterpret_cast<char*>(current_fence->GetAddress());
+ auto payload = current_fence->GetPayload();
+ std::memcpy(address, &payload, sizeof(payload));
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
@@ -104,8 +106,8 @@ protected:
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
TQueryCache& query_cache_)
- : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
- texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
+ : rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
+ buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
virtual ~FenceManager() = default;
@@ -113,7 +115,7 @@ protected:
/// true
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
- virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
+ virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
/// Queues a fence into the backend if the fence isn't stubbed.
virtual void QueueFence(TFence& fence) = 0;
/// Notifies that the backend fence has been signaled/reached in host GPU.
@@ -123,7 +125,6 @@ protected:
VideoCore::RasterizerInterface& rasterizer;
Tegra::GPU& gpu;
- Tegra::MemoryManager& gpu_memory;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
@@ -137,8 +138,9 @@ private:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- gpu_memory.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ char* address = reinterpret_cast<char*>(current_fence->GetAddress());
+ const auto payload = current_fence->GetPayload();
+ std::memcpy(address, &payload, sizeof(payload));
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 33431f2a0..80a1c69e0 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -18,6 +18,8 @@
#include "core/hle/service/nvdrv/nvdata.h"
#include "core/perf_stats.h"
#include "video_core/cdma_pusher.h"
+#include "video_core/control/channel_state.h"
+#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
@@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
struct GPU::Impl {
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
- : gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
- system)},
- dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
- maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
- fermi_2d{std::make_unique<Engines::Fermi2D>()},
- kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
- maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
- kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+ : gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
- gpu_thread{system_, is_async_} {}
+ gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
~Impl() = default;
- /// Binds a renderer to the GPU.
- void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
- renderer = std::move(renderer_);
- rasterizer = renderer->ReadRasterizer();
-
- memory_manager->BindRasterizer(rasterizer);
- maxwell_3d->BindRasterizer(rasterizer);
- fermi_2d->BindRasterizer(rasterizer);
- kepler_compute->BindRasterizer(rasterizer);
- kepler_memory->BindRasterizer(rasterizer);
- maxwell_dma->BindRasterizer(rasterizer);
+ std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
+ auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
+ channels.emplace(channel_id, channel_state);
+ scheduler->DeclareChannel(channel_state);
+ return channel_state;
}
- /// Calls a GPU method.
- void CallMethod(const GPU::MethodCall& method_call) {
- LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
- method_call.subchannel);
+ void BindChannel(s32 channel_id) {
+ if (bound_channel == channel_id) {
+ return;
+ }
+ auto it = channels.find(channel_id);
+ ASSERT(it != channels.end());
+ bound_channel = channel_id;
+ current_channel = it->second.get();
- ASSERT(method_call.subchannel < bound_engines.size());
+ rasterizer->BindChannel(*current_channel);
+ }
- if (ExecuteMethodOnEngine(method_call.method)) {
- CallEngineMethod(method_call);
- } else {
- CallPullerMethod(method_call);
- }
+ std::shared_ptr<Control::ChannelState> AllocateChannel() {
+ return CreateChannel(new_channel_id++);
}
- /// Calls a GPU multivalue method.
- void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending) {
- LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
+ void InitChannel(Control::ChannelState& to_init) {
+ to_init.Init(system, gpu);
+ to_init.BindRasterizer(rasterizer);
+ rasterizer->InitializeChannel(to_init);
+ }
- ASSERT(subchannel < bound_engines.size());
+ void ReleaseChannel(Control::ChannelState& to_release) {
+ UNIMPLEMENTED();
+ }
- if (ExecuteMethodOnEngine(method)) {
- CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
- } else {
- for (std::size_t i = 0; i < amount; i++) {
- CallPullerMethod(GPU::MethodCall{
- method,
- base_start[i],
- subchannel,
- methods_pending - static_cast<u32>(i),
- });
- }
+ void CreateHost1xChannel() {
+ if (host1x_channel) {
+ return;
}
+ host1x_channel = CreateChannel(0);
+ host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system);
+ InitChannel(*host1x_channel);
+ }
+
+ /// Binds a renderer to the GPU.
+ void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+ renderer = std::move(renderer_);
+ rasterizer = renderer->ReadRasterizer();
}
/// Flush all current written commands into the host GPU for execution.
@@ -146,42 +141,44 @@ struct GPU::Impl {
/// Returns a reference to the Maxwell3D GPU engine.
[[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
- return *maxwell_3d;
+ ASSERT(current_channel);
+ return *current_channel->maxwell_3d;
}
/// Returns a const reference to the Maxwell3D GPU engine.
[[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
- return *maxwell_3d;
+ ASSERT(current_channel);
+ return *current_channel->maxwell_3d;
}
/// Returns a reference to the KeplerCompute GPU engine.
[[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
- return *kepler_compute;
+ ASSERT(current_channel);
+ return *current_channel->kepler_compute;
}
/// Returns a reference to the KeplerCompute GPU engine.
[[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
- return *kepler_compute;
+ ASSERT(current_channel);
+ return *current_channel->kepler_compute;
}
/// Returns a reference to the GPU memory manager.
[[nodiscard]] Tegra::MemoryManager& MemoryManager() {
- return *memory_manager;
- }
-
- /// Returns a const reference to the GPU memory manager.
- [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
- return *memory_manager;
+ CreateHost1xChannel();
+ return *host1x_channel->memory_manager;
}
/// Returns a reference to the GPU DMA pusher.
[[nodiscard]] Tegra::DmaPusher& DmaPusher() {
- return *dma_pusher;
+ ASSERT(current_channel);
+ return *current_channel->dma_pusher;
}
/// Returns a const reference to the GPU DMA pusher.
[[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
- return *dma_pusher;
+ ASSERT(current_channel);
+ return *current_channel->dma_pusher;
}
/// Returns a reference to the underlying renderer.
@@ -306,7 +303,7 @@ struct GPU::Impl {
/// This can be used to launch any necessary threads and register any necessary
/// core timing events.
void Start() {
- gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+ gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
cpu_context->MakeCurrent();
}
@@ -328,8 +325,8 @@ struct GPU::Impl {
}
/// Push GPU command entries to be processed
- void PushGPUEntries(Tegra::CommandList&& entries) {
- gpu_thread.SubmitList(std::move(entries));
+ void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+ gpu_thread.SubmitList(channel, std::move(entries));
}
/// Push GPU command buffer entries to be processed
@@ -381,303 +378,16 @@ struct GPU::Impl {
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
}
- void ProcessBindMethod(const GPU::MethodCall& method_call) {
- // Bind the current subchannel to the desired engine id.
- LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
- method_call.argument);
- const auto engine_id = static_cast<EngineID>(method_call.argument);
- bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
- switch (engine_id) {
- case EngineID::FERMI_TWOD_A:
- dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
- break;
- case EngineID::MAXWELL_B:
- dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
- break;
- case EngineID::KEPLER_COMPUTE_B:
- dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
- break;
- case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
- }
- }
-
- void ProcessFenceActionMethod() {
- switch (regs.fence_action.op) {
- case GPU::FenceOperation::Acquire:
- WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
- break;
- case GPU::FenceOperation::Increment:
- IncrementSyncPoint(regs.fence_action.syncpoint_id);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
- }
- }
-
- void ProcessWaitForInterruptMethod() {
- // TODO(bunnei) ImplementMe
- LOG_WARNING(HW_GPU, "(STUBBED) called");
- }
-
- void ProcessSemaphoreTriggerMethod() {
- const auto semaphoreOperationMask = 0xF;
- const auto op =
- static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
- if (op == GpuSemaphoreOperation::WriteLong) {
- struct Block {
- u32 sequence;
- u32 zeros = 0;
- u64 timestamp;
- };
-
- Block block{};
- block.sequence = regs.semaphore_sequence;
- // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
- // CoreTiming
- block.timestamp = GetTicks();
- memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
- sizeof(block));
- } else {
- const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
- if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
- (op == GpuSemaphoreOperation::AcquireGequal &&
- static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
- (op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
- // Nothing to do in this case
- } else {
- regs.acquire_source = true;
- regs.acquire_value = regs.semaphore_sequence;
- if (op == GpuSemaphoreOperation::AcquireEqual) {
- regs.acquire_active = true;
- regs.acquire_mode = false;
- } else if (op == GpuSemaphoreOperation::AcquireGequal) {
- regs.acquire_active = true;
- regs.acquire_mode = true;
- } else if (op == GpuSemaphoreOperation::AcquireMask) {
- // TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
- // semaphore_sequence, gives a non-0 result
- LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
- } else {
- LOG_ERROR(HW_GPU, "Invalid semaphore operation");
- }
- }
- }
- }
-
- void ProcessSemaphoreRelease() {
- memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
- regs.semaphore_release);
- }
-
- void ProcessSemaphoreAcquire() {
- const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
- const auto value = regs.semaphore_acquire;
- if (word != value) {
- regs.acquire_active = true;
- regs.acquire_value = value;
- // TODO(kemathe73) figure out how to do the acquire_timeout
- regs.acquire_mode = false;
- regs.acquire_source = false;
- }
- }
-
- /// Calls a GPU puller method.
- void CallPullerMethod(const GPU::MethodCall& method_call) {
- regs.reg_array[method_call.method] = method_call.argument;
- const auto method = static_cast<BufferMethods>(method_call.method);
-
- switch (method) {
- case BufferMethods::BindObject: {
- ProcessBindMethod(method_call);
- break;
- }
- case BufferMethods::Nop:
- case BufferMethods::SemaphoreAddressHigh:
- case BufferMethods::SemaphoreAddressLow:
- case BufferMethods::SemaphoreSequence:
- break;
- case BufferMethods::UnkCacheFlush:
- rasterizer->SyncGuestHost();
- break;
- case BufferMethods::WrcacheFlush:
- rasterizer->SignalReference();
- break;
- case BufferMethods::FenceValue:
- break;
- case BufferMethods::RefCnt:
- rasterizer->SignalReference();
- break;
- case BufferMethods::FenceAction:
- ProcessFenceActionMethod();
- break;
- case BufferMethods::WaitForInterrupt:
- rasterizer->WaitForIdle();
- break;
- case BufferMethods::SemaphoreTrigger: {
- ProcessSemaphoreTriggerMethod();
- break;
- }
- case BufferMethods::NotifyIntr: {
- // TODO(Kmather73): Research and implement this method.
- LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
- break;
- }
- case BufferMethods::Unk28: {
- // TODO(Kmather73): Research and implement this method.
- LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
- break;
- }
- case BufferMethods::SemaphoreAcquire: {
- ProcessSemaphoreAcquire();
- break;
- }
- case BufferMethods::SemaphoreRelease: {
- ProcessSemaphoreRelease();
- break;
- }
- case BufferMethods::Yield: {
- // TODO(Kmather73): Research and implement this method.
- LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
- break;
- }
- default:
- LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
- break;
- }
- }
-
- /// Calls a GPU engine method.
- void CallEngineMethod(const GPU::MethodCall& method_call) {
- const EngineID engine = bound_engines[method_call.subchannel];
-
- switch (engine) {
- case EngineID::FERMI_TWOD_A:
- fermi_2d->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::MAXWELL_B:
- maxwell_3d->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::KEPLER_COMPUTE_B:
- kepler_compute->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- kepler_memory->CallMethod(method_call.method, method_call.argument,
- method_call.IsLastCall());
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine");
- }
- }
-
- /// Calls a GPU engine multivalue method.
- void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending) {
- const EngineID engine = bound_engines[subchannel];
-
- switch (engine) {
- case EngineID::FERMI_TWOD_A:
- fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::MAXWELL_B:
- maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::KEPLER_COMPUTE_B:
- kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::MAXWELL_DMA_COPY_A:
- maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- case EngineID::KEPLER_INLINE_TO_MEMORY_B:
- kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
- break;
- default:
- UNIMPLEMENTED_MSG("Unimplemented engine");
- }
- }
-
- /// Determines where the method should be executed.
- [[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
- const auto buffer_method = static_cast<BufferMethods>(method);
- return buffer_method >= BufferMethods::NonPullerMethods;
- }
-
- struct Regs {
- static constexpr size_t NUM_REGS = 0x40;
-
- union {
- struct {
- INSERT_PADDING_WORDS_NOINIT(0x4);
- struct {
- u32 address_high;
- u32 address_low;
-
- [[nodiscard]] GPUVAddr SemaphoreAddress() const {
- return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
- address_low);
- }
- } semaphore_address;
-
- u32 semaphore_sequence;
- u32 semaphore_trigger;
- INSERT_PADDING_WORDS_NOINIT(0xC);
-
- // The pusher and the puller share the reference counter, the pusher only has read
- // access
- u32 reference_count;
- INSERT_PADDING_WORDS_NOINIT(0x5);
-
- u32 semaphore_acquire;
- u32 semaphore_release;
- u32 fence_value;
- GPU::FenceAction fence_action;
- INSERT_PADDING_WORDS_NOINIT(0xE2);
-
- // Puller state
- u32 acquire_mode;
- u32 acquire_source;
- u32 acquire_active;
- u32 acquire_timeout;
- u32 acquire_value;
- };
- std::array<u32, NUM_REGS> reg_array;
- };
- } regs{};
-
GPU& gpu;
Core::System& system;
- std::unique_ptr<Tegra::MemoryManager> memory_manager;
- std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
std::unique_ptr<VideoCore::RendererBase> renderer;
VideoCore::RasterizerInterface* rasterizer = nullptr;
const bool use_nvdec;
- /// Mapping of command subchannels to their bound engine ids
- std::array<EngineID, 8> bound_engines{};
- /// 3D engine
- std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
- /// 2D engine
- std::unique_ptr<Engines::Fermi2D> fermi_2d;
- /// Compute engine
- std::unique_ptr<Engines::KeplerCompute> kepler_compute;
- /// DMA engine
- std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
- /// Inline memory engine
- std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+ std::shared_ptr<Control::ChannelState> host1x_channel;
+ s32 new_channel_id{1};
/// Shader build notifier
std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
/// When true, we are about to shut down emulation session, so terminate outstanding tasks
@@ -710,33 +420,10 @@ struct GPU::Impl {
VideoCommon::GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
-#define ASSERT_REG_POSITION(field_name, position) \
- static_assert(offsetof(Regs, field_name) == position * 4, \
- "Field " #field_name " has invalid position")
-
- ASSERT_REG_POSITION(semaphore_address, 0x4);
- ASSERT_REG_POSITION(semaphore_sequence, 0x6);
- ASSERT_REG_POSITION(semaphore_trigger, 0x7);
- ASSERT_REG_POSITION(reference_count, 0x14);
- ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
- ASSERT_REG_POSITION(semaphore_release, 0x1B);
- ASSERT_REG_POSITION(fence_value, 0x1C);
- ASSERT_REG_POSITION(fence_action, 0x1D);
-
- ASSERT_REG_POSITION(acquire_mode, 0x100);
- ASSERT_REG_POSITION(acquire_source, 0x101);
- ASSERT_REG_POSITION(acquire_active, 0x102);
- ASSERT_REG_POSITION(acquire_timeout, 0x103);
- ASSERT_REG_POSITION(acquire_value, 0x104);
-
-#undef ASSERT_REG_POSITION
-
- enum class GpuSemaphoreOperation {
- AcquireEqual = 0x1,
- WriteLong = 0x2,
- AcquireGequal = 0x4,
- AcquireMask = 0x8,
- };
+ std::unique_ptr<Tegra::Control::Scheduler> scheduler;
+ std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
+ Tegra::Control::ChannelState* current_channel;
+ s32 bound_channel{-1};
};
GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
@@ -744,17 +431,24 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
GPU::~GPU() = default;
-void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
- impl->BindRenderer(std::move(renderer));
+std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
+ return impl->AllocateChannel();
+}
+
+void GPU::InitChannel(Control::ChannelState& to_init) {
+ impl->InitChannel(to_init);
}
-void GPU::CallMethod(const MethodCall& method_call) {
- impl->CallMethod(method_call);
+void GPU::BindChannel(s32 channel_id) {
+ impl->BindChannel(channel_id);
}
-void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending) {
- impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
+void GPU::ReleaseChannel(Control::ChannelState& to_release) {
+ impl->ReleaseChannel(to_release);
+}
+
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
+ impl->BindRenderer(std::move(renderer));
}
void GPU::FlushCommands() {
@@ -881,8 +575,8 @@ void GPU::ReleaseContext() {
impl->ReleaseContext();
}
-void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
- impl->PushGPUEntries(std::move(entries));
+void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
+ impl->PushGPUEntries(channel, std::move(entries));
}
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 42c91954f..74d55e074 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -89,57 +89,20 @@ class Maxwell3D;
class KeplerCompute;
} // namespace Engines
-enum class EngineID {
- FERMI_TWOD_A = 0x902D, // 2D Engine
- MAXWELL_B = 0xB197, // 3D Engine
- KEPLER_COMPUTE_B = 0xB1C0,
- KEPLER_INLINE_TO_MEMORY_B = 0xA140,
- MAXWELL_DMA_COPY_A = 0xB0B5,
-};
+namespace Control {
+struct ChannelState;
+}
class MemoryManager;
class GPU final {
public:
- struct MethodCall {
- u32 method{};
- u32 argument{};
- u32 subchannel{};
- u32 method_count{};
-
- explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
- : method(method_), argument(argument_), subchannel(subchannel_),
- method_count(method_count_) {}
-
- [[nodiscard]] bool IsLastCall() const {
- return method_count <= 1;
- }
- };
-
- enum class FenceOperation : u32 {
- Acquire = 0,
- Increment = 1,
- };
-
- union FenceAction {
- u32 raw;
- BitField<0, 1, FenceOperation> op;
- BitField<8, 24, u32> syncpoint_id;
- };
-
explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
~GPU();
/// Binds a renderer to the GPU.
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
- /// Calls a GPU method.
- void CallMethod(const MethodCall& method_call);
-
- /// Calls a GPU multivalue method.
- void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
- u32 methods_pending);
-
/// Flush all current written commands into the host GPU for execution.
void FlushCommands();
/// Synchronizes CPU writes with Host GPU memory.
@@ -147,6 +110,14 @@ public:
/// Signal the ending of command list.
void OnCommandListEnd();
+ std::shared_ptr<Control::ChannelState> AllocateChannel();
+
+ void InitChannel(Control::ChannelState& to_init);
+
+ void BindChannel(s32 channel_id);
+
+ void ReleaseChannel(Control::ChannelState& to_release);
+
/// Request a host GPU memory flush from the CPU.
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
@@ -226,7 +197,7 @@ public:
void ReleaseContext();
/// Push GPU command entries to be processed
- void PushGPUEntries(Tegra::CommandList&& entries);
+ void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
/// Push GPU command buffer entries to be processed
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
@@ -248,7 +219,7 @@ public:
private:
struct Impl;
- std::unique_ptr<Impl> impl;
+ mutable std::unique_ptr<Impl> impl;
};
} // namespace Tegra
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index f0e48cfbd..9844cde43 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -8,6 +8,7 @@
#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
+#include "video_core/control/scheduler.h"
#include "video_core/dma_pusher.h"
#include "video_core/gpu.h"
#include "video_core/gpu_thread.h"
@@ -18,7 +19,7 @@ namespace VideoCommon::GPUThread {
/// Runs the GPU thread
static void RunThread(std::stop_token stop_token, Core::System& system,
VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher, SynchState& state) {
+ Tegra::Control::Scheduler& scheduler, SynchState& state) {
std::string name = "GPU";
MicroProfileOnThreadCreate(name.c_str());
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
@@ -36,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
break;
}
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
- dma_pusher.Push(std::move(submit_list->entries));
- dma_pusher.DispatchCalls();
+ scheduler.Push(submit_list->channel, std::move(submit_list->entries));
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
@@ -68,14 +68,14 @@ ThreadManager::~ThreadManager() = default;
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher) {
+ Tegra::Control::Scheduler& scheduler) {
rasterizer = renderer.ReadRasterizer();
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
- std::ref(dma_pusher), std::ref(state));
+ std::ref(scheduler), std::ref(state));
}
-void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
- PushCommand(SubmitListCommand(std::move(entries)));
+void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
+ PushCommand(SubmitListCommand(channel, std::move(entries)));
}
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 2f8210cb9..c5078a2b3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -15,7 +15,9 @@
namespace Tegra {
struct FramebufferConfig;
-class DmaPusher;
+namespace Control {
+class Scheduler;
+}
} // namespace Tegra
namespace Core {
@@ -34,8 +36,10 @@ namespace VideoCommon::GPUThread {
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
- explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
+ explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
+ : channel{channel_}, entries{std::move(entries_)} {}
+ s32 channel;
Tegra::CommandList entries;
};
@@ -112,10 +116,10 @@ public:
/// Creates and starts the GPU thread.
void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
- Tegra::DmaPusher& dma_pusher);
+ Tegra::Control::Scheduler& scheduler);
/// Push GPU command entries to be processed
- void SubmitList(Tegra::CommandList&& entries);
+ void SubmitList(s32 channel, Tegra::CommandList&& entries);
/// Swap buffers (render frame)
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index bf9eb735d..a3efd365e 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -133,11 +133,6 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
// TryLockPage(page_entry, size);
auto& current_page = page_table[PageEntryIndex(gpu_addr)];
- if ((!current_page.IsValid() && page_entry.IsValid()) ||
- current_page.ToAddress() != page_entry.ToAddress()) {
- rasterizer->ModifyGPUMemory(gpu_addr, size);
- }
-
current_page = page_entry;
}
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 889b606b3..eb68ea638 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -17,6 +17,7 @@
#include "common/assert.h"
#include "common/settings.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -90,13 +91,10 @@ private:
};
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
-class QueryCacheBase {
+class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
- explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_)
- : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
- gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+ explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
+ : rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
@@ -117,13 +115,13 @@ public:
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
- const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
ASSERT_OR_EXECUTE(cpu_addr, return;);
- u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
+ u8* const host_ptr = gpu_memory->GetPointer(gpu_addr);
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
@@ -137,7 +135,7 @@ public:
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
}
@@ -264,8 +262,6 @@ private:
static constexpr unsigned YUZU_PAGEBITS = 12;
VideoCore::RasterizerInterface& rasterizer;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index a04a76481..8dacb2626 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -16,6 +16,9 @@ class MemoryManager;
namespace Engines {
class AccelerateDMAInterface;
}
+namespace Control {
+struct ChannelState;
+}
} // namespace Tegra
namespace VideoCore {
@@ -137,5 +140,11 @@ public:
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const DiskResourceLoadCallback& callback) {}
+
+ virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {}
+
+ virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
+
+ virtual void ReleaseChannel(s32 channel_id) {}
};
} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index 6e82c2e28..c76446b60 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -12,7 +12,7 @@ namespace OpenGL {
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
-GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
+GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_} {}
GLInnerFence::~GLInnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
}
-Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
}
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index 14ff00db2..fced8d002 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -17,7 +17,7 @@ namespace OpenGL {
class GLInnerFence : public VideoCommon::FenceBase {
public:
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
- explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
+ explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
~GLInnerFence();
void Queue();
@@ -41,7 +41,7 @@ public:
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
- Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+ Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index ed40f5791..5070db441 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -26,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_)
- : QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
+ : QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
QueryCache::~QueryCache() = default;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index 8a49f1ef0..14ce59990 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -28,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::MemoryManager& gpu_memory_);
+ explicit QueryCache(RasterizerOpenGL& rasterizer_);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index a0d048b0b..e8d61bd41 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -60,12 +60,11 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
texture_cache_runtime(device, program_manager, state_tracker),
- texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
- buffer_cache_runtime(device),
- buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
- buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
- query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
+ texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
+ buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+ shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
+ state_tracker, gpu.ShaderNotify()),
+ query_cache(*this), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
@@ -392,7 +391,8 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
gpu_memory.Write<u32>(addr, value);
return;
}
- fence_manager.SignalSemaphore(addr, value);
+ auto paddr = gpu_memory.GetPointer(addr);
+ fence_manager.SignalSemaphore(paddr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 0b8d8ec92..494581d0d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -151,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
} // Anonymous namespace
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- TextureCache& texture_cache_, BufferCache& buffer_cache_,
- ProgramManager& program_manager_, StateTracker& state_tracker_,
- VideoCore::ShaderNotify& shader_notify_)
- : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
- emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
- buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
- shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
+ const Device& device_, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_)
+ : VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
+ state_tracker{state_tracker_}, shader_notify{shader_notify_},
+ use_asynchronous_shaders{device.UseAsynchronousShaders()},
profile{
.supported_spirv = 0x00010000,
@@ -310,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
- const auto& regs{maxwell3d.regs};
+ const auto& regs{maxwell3d->regs};
graphics_key.raw = 0;
graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
@@ -351,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
}
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
- if (maxwell3d.regs.zeta_enable) {
+ if (maxwell3d->regs.zeta_enable) {
return nullptr;
}
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
- if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
return pipeline;
}
return nullptr;
@@ -368,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() {
if (!shader) {
return nullptr;
}
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
const ComputePipelineKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
@@ -481,8 +478,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
}
auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
return std::make_unique<GraphicsPipeline>(
- device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
- thread_worker, &shader_notify, sources, sources_spirv, infos, key);
+ device, texture_cache, buffer_cache, *gpu_memory, *maxwell3d, program_manager,
+ state_tracker, thread_worker, &shader_notify, sources, sources_spirv, infos, key);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
@@ -491,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
- const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
- const auto& qmd{kepler_compute.launch_description};
- ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+ const auto& qmd{kepler_compute->launch_description};
+ ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();
@@ -536,8 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
break;
}
- return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
- kepler_compute, program_manager, program.info, code,
+ return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, *gpu_memory,
+ *kepler_compute, program_manager, program.info, code,
code_spirv);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a14269dea..89f181fe3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -30,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
class ShaderCache : public VideoCommon::ShaderCache {
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
- TextureCache& texture_cache_, BufferCache& buffer_cache_,
- ProgramManager& program_manager_, StateTracker& state_tracker_,
- VideoCore::ShaderNotify& shader_notify_);
+ const Device& device_, TextureCache& texture_cache_,
+ BufferCache& buffer_cache_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
~ShaderCache();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7c78d0299..68c2bc34c 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -95,20 +95,25 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Core::Frontend::EmuWindow& emu_window,
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
- : RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
- cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
+ : RendererBase(emu_window, std::move(context_)),
+ telemetry_session(telemetry_session_),
+ cpu_memory(cpu_memory_),
+ gpu(gpu_),
+ library(OpenLibrary()),
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
true, Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window)),
- device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
- state_tracker(gpu), scheduler(device, state_tracker),
+ device(CreateDevice(instance, dld, *surface)),
+ memory_allocator(device, false),
+ state_tracker(gpu),
+ scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
screen_info),
- rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
- memory_allocator, state_tracker, scheduler) {
+ rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
+ state_tracker, scheduler) {
Report();
} catch (const vk::Exception& exception) {
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index c249b34d4..301cbbabe 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -14,7 +14,7 @@ namespace Vulkan {
InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
: FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
-InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
+InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
: FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
InnerFence::~InnerFence() = default;
@@ -52,7 +52,7 @@ Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
}
-Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
+Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
}
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 7c0bbd80a..ea9e88052 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -26,7 +26,7 @@ class Scheduler;
class InnerFence : public VideoCommon::FenceBase {
public:
explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
- explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
+ explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
~InnerFence();
void Queue();
@@ -51,7 +51,7 @@ public:
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
- Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
+ Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
void QueueFence(Fence& fence) override;
bool IsFenceSignaled(Fence& fence) const override;
void WaitFence(Fence& fence) override;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index accbfc8e1..b1e0b96c4 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -259,17 +259,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
return std::memcmp(&rhs, this, Size()) == 0;
}
-PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_, const Device& device_,
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
UpdateDescriptorQueue& update_descriptor_queue_,
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
- : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
- device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
- update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
- buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
+ : VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
+ descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
+ render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
+ texture_cache{texture_cache_}, shader_notify{shader_notify_},
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
serialization_thread(1, "VkPipelineSerialization") {
@@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
current_pipeline = nullptr;
return nullptr;
}
- graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
+ graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
device.IsExtVertexInputDynamicStateSupported());
if (current_pipeline) {
@@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
if (!shader) {
return nullptr;
}
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
const ComputePipelineCacheKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
@@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
}
// If something is using depth, we can assume that games are not rendering anything which
// will be used one time.
- if (maxwell3d.regs.zeta_enable) {
+ if (maxwell3d->regs.zeta_enable) {
return nullptr;
}
// If games are using a small index count, we can assume these are full screen quads.
// Usually these shaders are only used once for building textures so we can assume they
// can't be built async
- if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
+ if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
return pipeline;
}
return nullptr;
@@ -558,7 +556,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
}
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
return std::make_unique<GraphicsPipeline>(
- maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
+ *maxwell3d, *gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
std::move(modules), infos);
@@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
- const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
- const auto& qmd{kepler_compute.launch_description};
- ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+ const auto& qmd{kepler_compute->launch_description};
+ ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 127957dbf..61f9e9366 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -100,10 +100,8 @@ struct ShaderPools {
class PipelineCache : public VideoCommon::ShaderCache {
public:
- explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
- Tegra::Engines::KeplerCompute& kepler_compute,
- Tegra::MemoryManager& gpu_memory, const Device& device,
- Scheduler& scheduler, DescriptorPool& descriptor_pool,
+ explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
+ DescriptorPool& descriptor_pool,
UpdateDescriptorQueue& update_descriptor_queue,
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 2b859c6b8..393bbdf37 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
-QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
- const Device& device_, Scheduler& scheduler_)
- : QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
+QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+ Scheduler& scheduler_)
+ : QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
query_pools{
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
} {}
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index b0d86c4f8..26762ee09 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -52,9 +52,8 @@ private:
class QueryCache final
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
- const Device& device_, Scheduler& scheduler_);
+ explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
+ Scheduler& scheduler_);
~QueryCache();
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7e40c2df1..5d9ff0589 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -11,6 +11,7 @@
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
+#include "video_core/control/channel_state.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_vulkan/blit_image.h"
@@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
} // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& gpu_memory_,
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
const Device& device_, MemoryAllocator& memory_allocator_,
StateTracker& state_tracker_, Scheduler& scheduler_)
- : RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
- gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
- screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
- state_tracker{state_tracker_}, scheduler{scheduler_},
+ : RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
+ memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
update_descriptor_queue(device, scheduler),
blit_image(device, scheduler, state_tracker, descriptor_pool),
@@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
memory_allocator, staging_pool,
blit_image, astc_decoder_pass,
render_pass_cache},
- texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
+ texture_cache(texture_cache_runtime, *this),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
update_descriptor_queue, descriptor_pool),
- buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
- pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
- descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
- texture_cache, gpu.ShaderNotify()),
- query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
+ buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
+ pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
+ render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
+ query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
@@ -199,8 +196,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
UpdateDynamicStates();
- const auto& regs{maxwell3d.regs};
- const u32 num_instances{maxwell3d.mme_draw.instance_count};
+ const auto& regs{maxwell3d->regs};
+ const u32 num_instances{maxwell3d->mme_draw.instance_count};
const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
@@ -218,14 +215,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- if (!maxwell3d.ShouldExecute()) {
+ if (!maxwell3d->ShouldExecute()) {
return;
}
FlushWork();
query_cache.UpdateCounters();
- auto& regs = maxwell3d.regs;
+ auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -339,9 +336,9 @@ void RasterizerVulkan::DispatchCompute() {
return;
}
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
- pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
+ pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
- const auto& qmd{kepler_compute.launch_description};
+ const auto& qmd{kepler_compute->launch_description};
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
@@ -451,10 +448,11 @@ void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
if (!gpu.IsAsync()) {
- gpu_memory.Write<u32>(addr, value);
+ gpu_memory->Write<u32>(addr, value);
return;
}
- fence_manager.SignalSemaphore(addr, value);
+ auto paddr = gpu_memory->GetPointer(addr);
+ fence_manager.SignalSemaphore(paddr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
@@ -553,12 +551,12 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<u8> memory) {
- auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
+ auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
- gpu_memory.WriteBlock(address, memory.data(), copy_size);
+ gpu_memory->WriteBlock(address, memory.data(), copy_size);
return;
}
- gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
+ gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
@@ -627,7 +625,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
}
void RasterizerVulkan::UpdateDynamicStates() {
- auto& regs = maxwell3d.regs;
+ auto& regs = maxwell3d->regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -651,7 +649,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
}
void RasterizerVulkan::BeginTransformFeedback() {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -667,7 +665,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
}
void RasterizerVulkan::EndTransformFeedback() {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -917,7 +915,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
}
void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
- auto& dirty{maxwell3d.dirty.flags};
+ auto& dirty{maxwell3d->dirty.flags};
if (!dirty[Dirty::VertexInput]) {
return;
}
@@ -974,4 +972,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
});
}
+void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
+ CreateChannel(channel);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.CreateChannel(channel);
+ buffer_cache.CreateChannel(channel);
+ }
+ pipeline_cache.CreateChannel(channel);
+ query_cache.CreateChannel(channel);
+ state_tracker.SetupTables(channel);
+}
+
+void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
+ const s32 channel_id = channel.bind_id;
+ BindToChannel(channel_id);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.BindToChannel(channel_id);
+ buffer_cache.BindToChannel(channel_id);
+ }
+ pipeline_cache.BindToChannel(channel_id);
+ query_cache.BindToChannel(channel_id);
+ state_tracker.ChangeChannel(channel);
+ scheduler.InvalidateState();
+}
+
+void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
+ EraseChannel(channel_id);
+ {
+ std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
+ texture_cache.EraseChannel(channel_id);
+ buffer_cache.EraseChannel(channel_id);
+ }
+ pipeline_cache.EraseChannel(channel_id);
+ query_cache.EraseChannel(channel_id);
+}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index 0370ea39b..642fe6576 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -8,6 +8,7 @@
#include <boost/container/static_vector.hpp>
#include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/rasterizer_accelerated.h"
#include "video_core/rasterizer_interface.h"
@@ -54,13 +55,13 @@ private:
BufferCache& buffer_cache;
};
-class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
+class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
+ protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
public:
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
- Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
- ScreenInfo& screen_info_, const Device& device_,
- MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
- Scheduler& scheduler_);
+ Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
+ const Device& device_, MemoryAllocator& memory_allocator_,
+ StateTracker& state_tracker_, Scheduler& scheduler_);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -99,6 +100,12 @@ public:
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
+ void InitializeChannel(Tegra::Control::ChannelState& channel) override;
+
+ void BindChannel(Tegra::Control::ChannelState& channel) override;
+
+ void ReleaseChannel(s32 channel_id) override;
+
private:
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
@@ -134,9 +141,6 @@ private:
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
Tegra::GPU& gpu;
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
ScreenInfo& screen_info;
const Device& device;
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 9ad096431..a87bf8dd3 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -7,6 +7,7 @@
#include "common/common_types.h"
#include "core/core.h"
+#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h"
@@ -174,9 +175,8 @@ void SetupDirtyVertexBindings(Tables& tables) {
}
} // Anonymous namespace
-StateTracker::StateTracker(Tegra::GPU& gpu)
- : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
- auto& tables{gpu.Maxwell3D().dirty.tables};
+void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
+ auto& tables{channel_state.maxwell_3d->dirty.tables};
SetupDirtyFlags(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@@ -199,4 +199,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
SetupDirtyVertexBindings(tables);
}
+void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
+ flags = &channel_state.maxwell_3d->dirty.flags;
+}
+
+StateTracker::StateTracker(Tegra::GPU& gpu)
+ : flags{}, invalidation_flags{MakeInvalidationFlags()} {}
+
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index a85bc1c10..9f8a887f9 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -10,6 +10,12 @@
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
+namespace Tegra {
+namespace Control {
+struct ChannelState;
+}
+} // namespace Tegra
+
namespace Vulkan {
namespace Dirty {
@@ -56,16 +62,16 @@ public:
explicit StateTracker(Tegra::GPU& gpu);
void InvalidateCommandBufferState() {
- flags |= invalidation_flags;
+ (*flags) |= invalidation_flags;
current_topology = INVALID_TOPOLOGY;
}
void InvalidateViewports() {
- flags[Dirty::Viewports] = true;
+ (*flags)[Dirty::Viewports] = true;
}
void InvalidateScissors() {
- flags[Dirty::Scissors] = true;
+ (*flags)[Dirty::Scissors] = true;
}
bool TouchViewports() {
@@ -139,16 +145,20 @@ public:
return has_changed;
}
+ void SetupTables(Tegra::Control::ChannelState& channel_state);
+
+ void ChangeChannel(Tegra::Control::ChannelState& channel_state);
+
private:
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
bool Exchange(std::size_t id, bool new_value) const noexcept {
- const bool is_dirty = flags[id];
- flags[id] = new_value;
+ const bool is_dirty = (*flags)[id];
+ (*flags)[id] = new_value;
return is_dirty;
}
- Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 164e4ee0e..f53066579 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -8,6 +8,7 @@
#include "common/assert.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
+#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
@@ -33,29 +34,25 @@ void ShaderCache::SyncGuestHost() {
RemovePendingShaders();
}
-ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_)
- : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
- rasterizer{rasterizer_} {}
+ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
- auto& dirty{maxwell3d.dirty.flags};
+ auto& dirty{maxwell3d->dirty.flags};
if (!dirty[VideoCommon::Dirty::Shaders]) {
return last_shaders_valid;
}
dirty[VideoCommon::Dirty::Shaders] = false;
- const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
- if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
unique_hashes[index] = 0;
continue;
}
- const auto& shader_config{maxwell3d.regs.shader_config[index]};
+ const auto& shader_config{maxwell3d->regs.shader_config[index]};
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
const GPUVAddr shader_addr{base_addr + shader_config.offset};
- const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
last_shaders_valid = false;
@@ -64,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
if (!shader_info) {
const u32 start_address{shader_config.offset};
- GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
+ GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
shader_info = MakeShaderInfo(env, *cpu_shader_addr);
}
shader_infos[index] = shader_info;
@@ -75,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
}
const ShaderInfo* ShaderCache::ComputeShader() {
- const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
- const auto& qmd{kepler_compute.launch_description};
+ const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
+ const auto& qmd{kepler_compute->launch_description};
const GPUVAddr shader_addr{program_base + qmd.program_start};
- const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
+ const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
return nullptr;
@@ -86,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() {
if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
return shader;
}
- ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
+ ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
return MakeShaderInfo(env, *cpu_shader_addr);
}
void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
size_t env_index{};
- const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
+ const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
if (unique_hashes[index] == 0) {
continue;
}
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
auto& env{result.envs[index]};
- const u32 start_address{maxwell3d.regs.shader_config[index].offset};
- env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
+ const u32 start_address{maxwell3d->regs.shader_config[index].offset};
+ env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
env.SetCachedSize(shader_infos[index]->size_bytes);
result.env_ptrs[env_index++] = &env;
}
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index f67cea8c4..a4391202d 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -12,6 +12,7 @@
#include <vector>
#include "common/common_types.h"
+#include "video_core/control/channel_state_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/shader_environment.h"
@@ -19,6 +20,10 @@ namespace Tegra {
class MemoryManager;
}
+namespace Tegra::Control {
+struct ChannelState;
+}
+
namespace VideoCommon {
class GenericEnvironment;
@@ -28,7 +33,7 @@ struct ShaderInfo {
size_t size_bytes{};
};
-class ShaderCache {
+class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
static constexpr u64 YUZU_PAGEBITS = 14;
static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS;
@@ -71,9 +76,7 @@ protected:
}
};
- explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
- Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_);
+ explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_);
/// @brief Update the hashes and information of shader stages
/// @param unique_hashes Shader hashes to store into when a stage is enabled
@@ -88,10 +91,6 @@ protected:
void GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes);
- Tegra::MemoryManager& gpu_memory;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
-
std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
bool last_shaders_valid = false;
diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h
index 1f85ec9da..620565684 100644
--- a/src/video_core/texture_cache/image_base.h
+++ b/src/video_core/texture_cache/image_base.h
@@ -88,6 +88,9 @@ struct ImageBase {
u32 scale_rating = 0;
u64 scale_tick = 0;
bool has_scaled = false;
+
+ size_t channel = 0;
+
ImageFlagBits flags = ImageFlagBits::CpuModified;
GPUVAddr gpu_addr = 0;
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 1dbe01bc0..2731aead0 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -7,6 +7,7 @@
#include "common/alignment.h"
#include "common/settings.h"
+#include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/texture_cache/image_view_base.h"
@@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
template <class P>
-TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
- Tegra::Engines::Maxwell3D& maxwell3d_,
- Tegra::Engines::KeplerCompute& kepler_compute_,
- Tegra::MemoryManager& gpu_memory_)
- : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
- kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
+TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
+ : runtime{runtime_}, rasterizer{rasterizer_} {
// Configure null sampler
TSCEntry sampler_descriptor{};
sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
@@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
sampler_descriptor.cubemap_anisotropy.Assign(1);
+ // Setup channels
+ current_channel_id = UNSET_CHANNEL;
+ state = nullptr;
+ maxwell3d = nullptr;
+ kepler_compute = nullptr;
+ gpu_memory = nullptr;
+
// Make sure the first index is reserved for the null resources
// This way the null resource becomes a compile time constant
void(slot_images.insert(NullImageParams{}));
@@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
if (True(image.flags & ImageFlagBits::Tracked)) {
UntrackImage(image, image_id);
@@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
template <class P>
template <bool has_blacklists>
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
- FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
+ FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
+ views);
}
template <class P>
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
- FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
+ FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
}
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
- if (index > graphics_sampler_table.Limit()) {
+ if (index > state->graphics_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
- const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
- SamplerId& id = graphics_sampler_ids[index];
+ const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
+ SamplerId& id = state->graphics_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
@@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
template <class P>
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
- if (index > compute_sampler_table.Limit()) {
+ if (index > state->compute_sampler_table.Limit()) {
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
return &slot_samplers[NULL_SAMPLER_ID];
}
- const auto [descriptor, is_new] = compute_sampler_table.Read(index);
- SamplerId& id = compute_sampler_ids[index];
+ const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
+ SamplerId& id = state->compute_sampler_ids[index];
if (is_new) {
id = FindSampler(descriptor);
}
@@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
template <class P>
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
- const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
- const u32 tic_limit = maxwell3d.regs.tic.limit;
- const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
- if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
- graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+ const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
+ const u32 tic_limit = maxwell3d->regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
+ if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
+ state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
- if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
- graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+ if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
+ state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
}
template <class P>
void TextureCache<P>::SynchronizeComputeDescriptors() {
- const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
- const u32 tic_limit = kepler_compute.regs.tic.limit;
- const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
- const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
- if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
- compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
+ const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
+ const u32 tic_limit = kepler_compute->regs.tic.limit;
+ const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
+ const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
+ if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
+ state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
}
- if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
- compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
+ if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
+ state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
}
}
template <class P>
bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
u32 scale_rating = 0;
bool rescaled = false;
std::array<ImageId, NUM_RT> tmp_color_images{};
@@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
template <class P>
void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
using namespace VideoCommon::Dirty;
- auto& flags = maxwell3d.dirty.flags;
+ auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::RenderTargets]) {
for (size_t index = 0; index < NUM_RT; ++index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
@@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
for (size_t index = 0; index < NUM_RT; ++index) {
- render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
+ render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
}
u32 up_scale = 1;
u32 down_shift = 0;
@@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
down_shift = Settings::values.resolution_info.down_shift;
}
render_targets.size = Extent2D{
- (maxwell3d.regs.render_area.width * up_scale) >> down_shift,
- (maxwell3d.regs.render_area.height * up_scale) >> down_shift,
+ (maxwell3d->regs.render_area.width * up_scale) >> down_shift,
+ (maxwell3d->regs.render_area.height * up_scale) >> down_shift,
};
flags[Dirty::DepthBiasGlobal] = true;
@@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
}
}
@@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() {
for (const ImageId image_id : download_ids) {
const ImageBase& image = slot_images[image_id];
const auto copies = FullDownloadCopies(image.info);
- SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
+ SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
download_map.offset += image.unswizzled_size_bytes;
download_span = download_span.subspan(image.unswizzled_size_bytes);
}
@@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
const GPUVAddr gpu_addr = image.gpu_addr;
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
- gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
+ gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
const auto uploads = FullUploadSwizzles(image.info);
runtime.AccelerateImageUpload(image, staging, uploads);
} else if (True(image.flags & ImageFlagBits::Converted)) {
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
- auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
+ auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
image.UploadMemory(staging, copies);
} else {
- const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
+ const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
image.UploadMemory(staging, copies);
}
}
template <class P>
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
- if (!IsValidEntry(gpu_memory, config)) {
+ if (!IsValidEntry(*gpu_memory, config)) {
return NULL_IMAGE_VIEW_ID;
}
- const auto [pair, is_new] = image_views.try_emplace(config);
+ const auto [pair, is_new] = state->image_views.try_emplace(config);
ImageViewId& image_view_id = pair->second;
if (is_new) {
image_view_id = CreateImageView(config);
@@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
template <class P>
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
- cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
+ cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
if (!cpu_addr) {
return ImageId{};
}
@@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
image.scale_tick = frame_tick + 1;
}
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
- auto& dirty = maxwell3d.dirty.flags;
+ auto& dirty = maxwell3d->dirty.flags;
dirty[Dirty::RenderTargets] = true;
dirty[Dirty::ZetaBuffer] = true;
for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) {
image.image_view_ids.clear();
image.image_view_infos.clear();
if constexpr (ENABLE_VALIDATION) {
- std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
- std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID);
}
- graphics_image_table.Invalidate();
- compute_image_table.Invalidate();
+ state->graphics_image_table.Invalidate();
+ state->compute_image_table.Invalidate();
has_deleted_images = true;
}
@@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
template <class P>
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options) {
- std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
const auto size = CalculateGuestSizeInBytes(info);
- cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
+ cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
if (!cpu_addr) {
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
virtual_invalid_space += Common::AlignUp(size, 32);
@@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
Image& new_image = slot_images[new_image_id];
- if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
+ if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
new_image.flags |= ImageFlagBits::Sparse;
}
@@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
return NULL_SAMPLER_ID;
}
- const auto [pair, is_new] = samplers.try_emplace(config);
+ const auto [pair, is_new] = state->samplers.try_emplace(config);
if (is_new) {
pair->second = slot_samplers.insert(runtime, config);
}
@@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
template <class P>
ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (index >= regs.rt_control.count) {
return ImageViewId{};
}
@@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
template <class P>
ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
if (!regs.zeta_enable) {
return ImageViewId{};
}
@@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
boost::container::small_vector<ImageId, 8> images;
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
- const auto it = gpu_page_table.find(page);
- if (it == gpu_page_table.end()) {
+ const auto it = state->gpu_page_table.find(page);
+ if (it == state->gpu_page_table.end()) {
if constexpr (BOOL_BREAK) {
return false;
} else {
@@ -1403,9 +1408,9 @@ template <typename Func>
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
- const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
+ const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
for (const auto& [gpu_addr, size] : segments) {
- std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
if constexpr (RETURNS_BOOL) {
if (func(gpu_addr, *cpu_addr, size)) {
@@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
image.lru_index = lru_cache.Insert(image_id, frame_tick);
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
- [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
+ [this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
if (False(image.flags & ImageFlagBits::Sparse)) {
auto map_id =
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
@@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
}
image_ids.erase(vector_it);
};
- ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
- [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
+ ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
+ clear_page_table(page, state->gpu_page_table);
+ });
if (False(image.flags & ImageFlagBits::Sparse)) {
const auto map_id = image.map_view_id;
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
@@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
// Mark render targets as dirty
- auto& dirty = maxwell3d.dirty.flags;
+ auto& dirty = maxwell3d->dirty.flags;
dirty[Dirty::RenderTargets] = true;
dirty[Dirty::ZetaBuffer] = true;
for (size_t rt = 0; rt < NUM_RT; ++rt) {
@@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
if (alloc_images.empty()) {
image_allocs_table.erase(alloc_it);
}
- if constexpr (ENABLE_VALIDATION) {
- std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
- std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
+ for (auto& this_state : channel_storage) {
+ if constexpr (ENABLE_VALIDATION) {
+ std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID);
+ std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID);
+ }
+ this_state.graphics_image_table.Invalidate();
+ this_state.compute_image_table.Invalidate();
}
- graphics_image_table.Invalidate();
- compute_image_table.Invalidate();
has_deleted_images = true;
}
template <class P>
void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
- auto it = image_views.begin();
- while (it != image_views.end()) {
+ auto it = state->image_views.begin();
+ while (it != state->image_views.end()) {
const auto found = std::ranges::find(removed_views, it->second);
if (found != removed_views.end()) {
- it = image_views.erase(it);
+ it = state->image_views.erase(it);
} else {
++it;
}
@@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
const ImageViewBase& image_view = slot_image_views[id];
const ImageBase& image = slot_images[image_view.image_id];
const Extent3D size = image_view.size;
- const auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d->regs;
const auto& scissor = regs.scissor_test[0];
if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
// Images with multiple resources can't be cleared in a single call
@@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
scissor.max_y >= size.height;
}
+template <class P>
+TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
+ : maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
+ gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
+ graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
+ gpu_memory} {}
+
+template <class P>
+void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
+ ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
+ auto new_id = [this, &channel]() {
+ if (!free_channel_ids.empty()) {
+ auto id = free_channel_ids.front();
+ free_channel_ids.pop_front();
+ new (&channel_storage[id]) ChannelInfo(channel);
+ return id;
+ }
+ channel_storage.emplace_back(channel);
+ return channel_storage.size() - 1;
+ }();
+ channel_map.emplace(channel.bind_id, new_id);
+ if (current_channel_id != UNSET_CHANNEL) {
+ state = &channel_storage[current_channel_id];
+ }
+}
+
+/// Bind a channel for execution.
+template <class P>
+void TextureCache<P>::BindToChannel(s32 id) {
+ auto it = channel_map.find(id);
+ ASSERT(it != channel_map.end() && id >= 0);
+ current_channel_id = it->second;
+ state = &channel_storage[current_channel_id];
+ maxwell3d = &state->maxwell3d;
+ kepler_compute = &state->kepler_compute;
+ gpu_memory = &state->gpu_memory;
+}
+
+/// Erase channel's state.
+template <class P>
+void TextureCache<P>::EraseChannel(s32 id) {
+ const auto it = channel_map.find(id);
+ ASSERT(it != channel_map.end() && id >= 0);
+ const auto this_id = it->second;
+ free_channel_ids.push_back(this_id);
+ channel_map.erase(it);
+ if (this_id == current_channel_id) {
+ current_channel_id = UNSET_CHANNEL;
+ state = nullptr;
+ maxwell3d = nullptr;
+ kepler_compute = nullptr;
+ gpu_memory = nullptr;
+ } else if (current_channel_id != UNSET_CHANNEL) {
+ state = &channel_storage[current_channel_id];
+ }
+}
+
} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h
index 7e6c6cef2..69efcb718 100644
--- a/src/video_core/texture_cache/texture_cache_base.h
+++ b/src/video_core/texture_cache/texture_cache_base.h
@@ -3,6 +3,8 @@
#pragma once
+#include <deque>
+#include <limits>
#include <mutex>
#include <span>
#include <type_traits>
@@ -26,6 +28,10 @@
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
+namespace Tegra::Control {
+struct ChannelState;
+}
+
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
@@ -58,6 +64,8 @@ class TextureCache {
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
+ static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
+
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
@@ -85,8 +93,7 @@ class TextureCache {
};
public:
- explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
- Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
+ explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
/// Notify the cache that a new frame has been queued
void TickFrame();
@@ -171,6 +178,15 @@ public:
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
+ /// Create channel state.
+ void CreateChannel(struct Tegra::Control::ChannelState& channel);
+
+ /// Bind a channel for execution.
+ void BindToChannel(s32 id);
+
+ /// Erase channel's state.
+ void EraseChannel(s32 id);
+
std::mutex mutex;
private:
@@ -338,31 +354,52 @@ private:
u64 GetScaledImageSizeBytes(ImageBase& image);
Runtime& runtime;
- VideoCore::RasterizerInterface& rasterizer;
- Tegra::Engines::Maxwell3D& maxwell3d;
- Tegra::Engines::KeplerCompute& kepler_compute;
- Tegra::MemoryManager& gpu_memory;
- DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
- DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
- std::vector<SamplerId> graphics_sampler_ids;
- std::vector<ImageViewId> graphics_image_view_ids;
+ struct ChannelInfo {
+ ChannelInfo() = delete;
+ ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
+ ChannelInfo(const ChannelInfo& state) = delete;
+ ChannelInfo& operator=(const ChannelInfo&) = delete;
+ ChannelInfo(ChannelInfo&& other) noexcept = default;
+ ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
+
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
+ DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
+ std::vector<SamplerId> graphics_sampler_ids;
+ std::vector<ImageViewId> graphics_image_view_ids;
- DescriptorTable<TICEntry> compute_image_table{gpu_memory};
- DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
- std::vector<SamplerId> compute_sampler_ids;
- std::vector<ImageViewId> compute_image_view_ids;
+ DescriptorTable<TICEntry> compute_image_table{gpu_memory};
+ DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
+ std::vector<SamplerId> compute_sampler_ids;
+ std::vector<ImageViewId> compute_image_view_ids;
+
+ std::unordered_map<TICEntry, ImageViewId> image_views;
+ std::unordered_map<TSCEntry, SamplerId> samplers;
+
+ std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
+ };
+
+ std::deque<ChannelInfo> channel_storage;
+ std::deque<size_t> free_channel_ids;
+ std::unordered_map<s32, size_t> channel_map;
+
+ ChannelInfo* state;
+ size_t current_channel_id{UNSET_CHANNEL};
+ VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D* maxwell3d;
+ Tegra::Engines::KeplerCompute* kepler_compute;
+ Tegra::MemoryManager* gpu_memory;
RenderTargets render_targets;
- std::unordered_map<TICEntry, ImageViewId> image_views;
- std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
- std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
-
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};