summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt22
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h51
-rw-r--r--src/video_core/engines/fermi_2d.cpp22
-rw-r--r--src/video_core/engines/fermi_2d.h13
-rw-r--r--src/video_core/engines/kepler_compute.cpp17
-rw-r--r--src/video_core/engines/kepler_compute.h16
-rw-r--r--src/video_core/engines/maxwell_3d.cpp37
-rw-r--r--src/video_core/engines/maxwell_3d.h21
-rw-r--r--src/video_core/engines/maxwell_dma.cpp2
-rw-r--r--src/video_core/engines/shader_header.h13
-rw-r--r--src/video_core/fence_manager.h32
-rw-r--r--src/video_core/gpu.cpp31
-rw-r--r--src/video_core/gpu.h16
-rw-r--r--src/video_core/gpu_asynch.cpp10
-rw-r--r--src/video_core/gpu_asynch.h4
-rw-r--r--src/video_core/gpu_synch.cpp8
-rw-r--r--src/video_core/gpu_synch.h6
-rw-r--r--src/video_core/host_shaders/CMakeLists.txt43
-rw-r--r--src/video_core/host_shaders/StringShaderHeader.cmake11
-rw-r--r--src/video_core/host_shaders/opengl_present.frag10
-rw-r--r--src/video_core/host_shaders/opengl_present.vert24
-rw-r--r--src/video_core/host_shaders/source_shader.h.in9
-rw-r--r--src/video_core/macro/macro.cpp2
-rw-r--r--src/video_core/macro/macro_hle.cpp6
-rw-r--r--src/video_core/macro/macro_interpreter.cpp1
-rw-r--r--src/video_core/macro/macro_jit_x64.cpp10
-rw-r--r--src/video_core/memory_manager.cpp22
-rw-r--r--src/video_core/memory_manager.h41
-rw-r--r--src/video_core/query_cache.h37
-rw-r--r--src/video_core/rasterizer_interface.h7
-rw-r--r--src/video_core/renderer_base.cpp4
-rw-r--r--src/video_core/renderer_base.h22
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp5
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_fence_manager.h7
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_query_cache.h14
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp261
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h26
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp9
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp64
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h23
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp33
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h16
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.cpp15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_util.h2
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h28
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp12
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h8
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h6
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp348
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h44
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h10
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp24
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp51
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.cpp37
-rw-r--r--src/video_core/renderer_vulkan/vk_blit_screen.h25
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.cpp17
-rw-r--r--src/video_core/renderer_vulkan/vk_buffer_cache.h7
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.cpp46
-rw-r--r--src/video_core/renderer_vulkan/vk_command_pool.h34
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.cpp23
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pass.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_compute_pipeline.cpp2
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.cpp19
-rw-r--r--src/video_core/renderer_vulkan/vk_descriptor_pool.h15
-rw-r--r--src/video_core/renderer_vulkan/vk_device.cpp32
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.cpp18
-rw-r--r--src/video_core/renderer_vulkan/vk_fence_manager.h8
-rw-r--r--src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp7
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.cpp56
-rw-r--r--src/video_core/renderer_vulkan/vk_master_semaphore.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp95
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.h29
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.cpp57
-rw-r--r--src/video_core/renderer_vulkan/vk_query_cache.h23
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp142
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h20
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.cpp311
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_manager.h196
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.cpp63
-rw-r--r--src/video_core/renderer_vulkan/vk_resource_pool.h43
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp77
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.h70
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp49
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h14
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.cpp19
-rw-r--r--src/video_core/renderer_vulkan/vk_state_tracker.h28
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.cpp12
-rw-r--r--src/video_core/renderer_vulkan/vk_stream_buffer.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.cpp20
-rw-r--r--src/video_core/renderer_vulkan/vk_swapchain.h9
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.cpp25
-rw-r--r--src/video_core/renderer_vulkan/vk_texture_cache.h19
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp71
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h48
-rw-r--r--src/video_core/shader/ast.h25
-rw-r--r--src/video_core/shader/async_shaders.cpp6
-rw-r--r--src/video_core/shader/async_shaders.h37
-rw-r--r--src/video_core/shader/control_flow.cpp27
-rw-r--r--src/video_core/shader/decode/arithmetic_half.cpp3
-rw-r--r--src/video_core/shader/decode/arithmetic_integer_immediate.cpp35
-rw-r--r--src/video_core/shader/decode/image.cpp15
-rw-r--r--src/video_core/shader/decode/memory.cpp3
-rw-r--r--src/video_core/shader/decode/texture.cpp43
-rw-r--r--src/video_core/shader/memory_util.cpp7
-rw-r--r--src/video_core/shader/memory_util.h6
-rw-r--r--src/video_core/shader/registry.cpp50
-rw-r--r--src/video_core/shader/registry.h2
-rw-r--r--src/video_core/shader/track.cpp4
-rw-r--r--src/video_core/texture_cache/surface_base.cpp10
-rw-r--r--src/video_core/texture_cache/surface_params.cpp11
-rw-r--r--src/video_core/texture_cache/surface_params.h5
-rw-r--r--src/video_core/texture_cache/texture_cache.h51
-rw-r--r--src/video_core/video_core.cpp35
122 files changed, 1805 insertions, 2093 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 3cd896a0f..3df54816d 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(host_shaders)
+
add_library(video_core STATIC
buffer_cache/buffer_block.h
buffer_cache/buffer_cache.h
@@ -188,6 +190,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_blit_screen.h
renderer_vulkan/vk_buffer_cache.cpp
renderer_vulkan/vk_buffer_cache.h
+ renderer_vulkan/vk_command_pool.cpp
+ renderer_vulkan/vk_command_pool.h
renderer_vulkan/vk_compute_pass.cpp
renderer_vulkan/vk_compute_pass.h
renderer_vulkan/vk_compute_pipeline.cpp
@@ -202,6 +206,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_graphics_pipeline.h
renderer_vulkan/vk_image.cpp
renderer_vulkan/vk_image.h
+ renderer_vulkan/vk_master_semaphore.cpp
+ renderer_vulkan/vk_master_semaphore.h
renderer_vulkan/vk_memory_manager.cpp
renderer_vulkan/vk_memory_manager.h
renderer_vulkan/vk_pipeline_cache.cpp
@@ -212,8 +218,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_rasterizer.h
renderer_vulkan/vk_renderpass_cache.cpp
renderer_vulkan/vk_renderpass_cache.h
- renderer_vulkan/vk_resource_manager.cpp
- renderer_vulkan/vk_resource_manager.h
+ renderer_vulkan/vk_resource_pool.cpp
+ renderer_vulkan/vk_resource_pool.h
renderer_vulkan/vk_sampler_cache.cpp
renderer_vulkan/vk_sampler_cache.h
renderer_vulkan/vk_scheduler.cpp
@@ -244,6 +250,9 @@ create_target_directory_groups(video_core)
target_link_libraries(video_core PUBLIC common core)
target_link_libraries(video_core PRIVATE glad xbyak)
+add_dependencies(video_core host_shaders)
+target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
+
if (ENABLE_VULKAN)
target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
target_compile_definitions(video_core PRIVATE HAS_VULKAN)
@@ -264,5 +273,12 @@ endif()
if (MSVC)
target_compile_options(video_core PRIVATE /we4267)
else()
- target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion)
+ target_compile_options(video_core PRIVATE
+ -Werror=conversion
+ -Wno-error=sign-conversion
+ -Werror=switch
+ -Werror=unused-variable
+ -Werror=unused-but-set-variable
+ -Werror=class-memaccess
+ )
endif()
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index b5dc68902..e7edd733f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -51,46 +51,43 @@ public:
bool is_written = false, bool use_fast_cbuf = false) {
std::lock_guard lock{mutex};
- auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- if (!cpu_addr_opt) {
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ if (!cpu_addr) {
return GetEmptyBuffer(size);
}
- const VAddr cpu_addr = *cpu_addr_opt;
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
constexpr std::size_t max_stream_size = 0x800;
if (use_fast_cbuf || size < max_stream_size) {
- if (!is_written && !IsRegionWritten(cpu_addr, cpu_addr + size - 1)) {
- const bool is_granular = memory_manager.IsGranularRange(gpu_addr, size);
+ if (!is_written && !IsRegionWritten(*cpu_addr, *cpu_addr + size - 1)) {
+ const bool is_granular = gpu_memory.IsGranularRange(gpu_addr, size);
if (use_fast_cbuf) {
u8* dest;
if (is_granular) {
- dest = memory_manager.GetPointer(gpu_addr);
+ dest = gpu_memory.GetPointer(gpu_addr);
} else {
staging_buffer.resize(size);
dest = staging_buffer.data();
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
}
return ConstBufferUpload(dest, size);
}
if (is_granular) {
- u8* const host_ptr = memory_manager.GetPointer(gpu_addr);
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
return StreamBufferUpload(size, alignment, [host_ptr, size](u8* dest) {
std::memcpy(dest, host_ptr, size);
});
} else {
- return StreamBufferUpload(
- size, alignment, [&memory_manager, gpu_addr, size](u8* dest) {
- memory_manager.ReadBlockUnsafe(gpu_addr, dest, size);
- });
+ return StreamBufferUpload(size, alignment, [this, gpu_addr, size](u8* dest) {
+ gpu_memory.ReadBlockUnsafe(gpu_addr, dest, size);
+ });
}
}
}
- Buffer* const block = GetBlock(cpu_addr, size);
- MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size);
+ Buffer* const block = GetBlock(*cpu_addr, size);
+ MapInterval* const map = MapAddress(block, gpu_addr, *cpu_addr, size);
if (!map) {
return GetEmptyBuffer(size);
}
@@ -106,7 +103,7 @@ public:
}
}
- return BufferInfo{block->Handle(), block->Offset(cpu_addr), block->Address()};
+ return BufferInfo{block->Handle(), block->Offset(*cpu_addr), block->Address()};
}
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@@ -262,9 +259,11 @@ public:
virtual BufferInfo GetEmptyBuffer(std::size_t size) = 0;
protected:
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- std::unique_ptr<StreamBuffer> stream_buffer)
- : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)} {}
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
+ std::unique_ptr<StreamBuffer> stream_buffer_)
+ : rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
+ stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
~BufferCache() = default;
@@ -326,14 +325,13 @@ private:
MapInterval* MapAddress(Buffer* block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) {
const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) {
- auto& memory_manager = system.GPU().MemoryManager();
const VAddr cpu_addr_end = cpu_addr + size;
- if (memory_manager.IsGranularRange(gpu_addr, size)) {
- u8* host_ptr = memory_manager.GetPointer(gpu_addr);
+ if (gpu_memory.IsGranularRange(gpu_addr, size)) {
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
block->Upload(block->Offset(cpu_addr), size, host_ptr);
} else {
staging_buffer.resize(size);
- memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
+ gpu_memory.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size);
block->Upload(block->Offset(cpu_addr), size, staging_buffer.data());
}
return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr));
@@ -392,7 +390,7 @@ private:
continue;
}
staging_buffer.resize(size);
- system.Memory().ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
+ cpu_memory.ReadBlockUnsafe(interval.lower(), staging_buffer.data(), size);
block->Upload(block->Offset(interval.lower()), size, staging_buffer.data());
}
}
@@ -431,7 +429,7 @@ private:
const std::size_t size = map->end - map->start;
staging_buffer.resize(size);
block->Download(block->Offset(map->start), size, staging_buffer.data());
- system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size);
+ cpu_memory.WriteBlockUnsafe(map->start, staging_buffer.data(), size);
map->MarkAsModified(false, 0);
}
@@ -567,7 +565,8 @@ private:
}
VideoCore::RasterizerInterface& rasterizer;
- Core::System& system;
+ Tegra::MemoryManager& gpu_memory;
+ Core::Memory::Memory& cpu_memory;
std::unique_ptr<StreamBuffer> stream_buffer;
BufferType stream_buffer_handle;
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index ff10ff40d..9409c4075 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,7 +10,13 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
+Fermi2D::Fermi2D() = default;
+
+Fermi2D::~Fermi2D() = default;
+
+void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
@@ -81,13 +87,13 @@ void Fermi2D::HandleSurfaceCopy() {
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
dst_blit_y2};
- Config copy_config;
- copy_config.operation = regs.operation;
- copy_config.filter = regs.blit_control.filter;
- copy_config.src_rect = src_rect;
- copy_config.dst_rect = dst_rect;
-
- if (!rasterizer.AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
+ const Config copy_config{
+ .operation = regs.operation,
+ .filter = regs.blit_control.filter,
+ .src_rect = src_rect,
+ .dst_rect = dst_rect,
+ };
+ if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
UNIMPLEMENTED();
}
}
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 8f37d053f..0909709ec 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -34,8 +34,11 @@ namespace Tegra::Engines {
class Fermi2D final : public EngineInterface {
public:
- explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
- ~Fermi2D() = default;
+ explicit Fermi2D();
+ ~Fermi2D();
+
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
/// Write the value to the register identified by method.
void CallMethod(u32 method, u32 method_argument, bool is_last_call) override;
@@ -142,14 +145,14 @@ public:
} regs{};
struct Config {
- Operation operation;
- Filter filter;
+ Operation operation{};
+ Filter filter{};
Common::Rectangle<u32> src_rect;
Common::Rectangle<u32> dst_rect;
};
private:
- VideoCore::RasterizerInterface& rasterizer;
+ VideoCore::RasterizerInterface* rasterizer;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index a82b06a38..898370739 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -16,14 +16,15 @@
namespace Tegra::Engines {
-KeplerCompute::KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager}, upload_state{
- memory_manager,
- regs.upload} {}
+KeplerCompute::KeplerCompute(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_}, upload_state{memory_manager, regs.upload} {}
KeplerCompute::~KeplerCompute() = default;
+void KeplerCompute::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+
void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
ASSERT_MSG(method < Regs::NUM_REGS,
"Invalid KeplerCompute register, increase the size of the Regs structure");
@@ -104,11 +105,11 @@ SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
}
VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
void KeplerCompute::ProcessLaunch() {
@@ -119,7 +120,7 @@ void KeplerCompute::ProcessLaunch() {
const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
- rasterizer.DispatchCompute(code_addr);
+ rasterizer->DispatchCompute(code_addr);
}
Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index b7f668d88..7f2500aab 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -42,10 +42,12 @@ namespace Tegra::Engines {
class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
public:
- explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
+ explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
~KeplerCompute();
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
static constexpr std::size_t NumConstBuffers = 8;
struct Regs {
@@ -230,11 +232,6 @@ public:
const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
private:
- Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
- MemoryManager& memory_manager;
- Upload::State upload_state;
-
void ProcessLaunch();
/// Retrieves information about a specific TIC entry from the TIC buffer.
@@ -242,6 +239,11 @@ private:
/// Retrieves information about a specific TSC entry from the TSC buffer.
Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
+
+ Core::System& system;
+ MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+ Upload::State upload_state;
};
#define ASSERT_REG_POSITION(field_name, position) \
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index c01436295..57ebc785f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,14 +22,19 @@ using VideoCore::QueryType;
/// First register id that is actually a Macro call.
constexpr u32 MacroRegistersStart = 0xE00;
-Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
- macro_engine{GetMacroEngine(*this)}, upload_state{memory_manager, regs.upload} {
+Maxwell3D::Maxwell3D(Core::System& system_, MemoryManager& memory_manager_)
+ : system{system_}, memory_manager{memory_manager_}, macro_engine{GetMacroEngine(*this)},
+ upload_state{memory_manager, regs.upload} {
dirty.flags.flip();
InitializeRegisterDefaults();
}
+Maxwell3D::~Maxwell3D() = default;
+
+void Maxwell3D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+
void Maxwell3D::InitializeRegisterDefaults() {
// Initializes registers to their default values - what games expect them to be at boot. This is
// for certain registers that may not be explicitly set by games.
@@ -192,7 +197,7 @@ void Maxwell3D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
switch (method) {
case MAXWELL3D_REG_INDEX(wait_for_idle): {
- rasterizer.WaitForIdle();
+ rasterizer->WaitForIdle();
break;
}
case MAXWELL3D_REG_INDEX(shadow_ram_control): {
@@ -402,7 +407,7 @@ void Maxwell3D::FlushMMEInlineDraw() {
const bool is_indexed = mme_draw.current_mode == MMEDrawMode::Indexed;
if (ShouldExecute()) {
- rasterizer.Draw(is_indexed, true);
+ rasterizer->Draw(is_indexed, true);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -465,7 +470,7 @@ void Maxwell3D::ProcessQueryGet() {
switch (regs.query.query_get.operation) {
case Regs::QueryOperation::Release:
if (regs.query.query_get.fence == 1) {
- rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
+ rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
} else {
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
}
@@ -533,7 +538,7 @@ void Maxwell3D::ProcessQueryCondition() {
void Maxwell3D::ProcessCounterReset() {
switch (regs.counter_reset) {
case Regs::CounterReset::SampleCnt:
- rasterizer.ResetCounter(QueryType::SamplesPassed);
+ rasterizer->ResetCounter(QueryType::SamplesPassed);
break;
default:
LOG_DEBUG(Render_OpenGL, "Unimplemented counter reset={}",
@@ -547,7 +552,7 @@ void Maxwell3D::ProcessSyncPoint() {
const u32 increment = regs.sync_info.increment.Value();
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
if (increment) {
- rasterizer.SignalSyncPoint(sync_point);
+ rasterizer->SignalSyncPoint(sync_point);
}
}
@@ -570,7 +575,7 @@ void Maxwell3D::DrawArrays() {
const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
if (ShouldExecute()) {
- rasterizer.Draw(is_indexed, false);
+ rasterizer->Draw(is_indexed, false);
}
// TODO(bunnei): Below, we reset vertex count so that we can use these registers to determine if
@@ -590,9 +595,9 @@ std::optional<u64> Maxwell3D::GetQueryResult() {
return 0;
case Regs::QuerySelect::SamplesPassed:
// Deferred.
- rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
- system.GPU().GetTicks());
- return {};
+ rasterizer->Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
+ system.GPU().GetTicks());
+ return std::nullopt;
default:
LOG_DEBUG(HW_GPU, "Unimplemented query select type {}",
static_cast<u32>(regs.query.query_get.select.Value()));
@@ -718,7 +723,7 @@ void Maxwell3D::ProcessClearBuffers() {
regs.clear_buffers.R == regs.clear_buffers.B &&
regs.clear_buffers.R == regs.clear_buffers.A);
- rasterizer.Clear();
+ rasterizer->Clear();
}
u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
@@ -752,11 +757,11 @@ SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
}
VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
- return rasterizer.AccessGuestDriverProfile();
+ return rasterizer->AccessGuestDriverProfile();
}
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index c97eeb792..bc289c55d 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -51,9 +51,11 @@ namespace Tegra::Engines {
class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
public:
- explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
- ~Maxwell3D() = default;
+ explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
+ ~Maxwell3D();
+
+ /// Binds a rasterizer to this engine.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
/// Register structure of the Maxwell3D engine.
/// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
@@ -1418,12 +1420,12 @@ public:
return execute_on;
}
- VideoCore::RasterizerInterface& GetRasterizer() {
- return rasterizer;
+ VideoCore::RasterizerInterface& Rasterizer() {
+ return *rasterizer;
}
- const VideoCore::RasterizerInterface& GetRasterizer() const {
- return rasterizer;
+ const VideoCore::RasterizerInterface& Rasterizer() const {
+ return *rasterizer;
}
/// Notify a memory write has happened.
@@ -1460,11 +1462,10 @@ private:
void InitializeRegisterDefaults();
Core::System& system;
-
- VideoCore::RasterizerInterface& rasterizer;
-
MemoryManager& memory_manager;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
+
/// Start offsets of each macro in macro_memory
std::array<u32, 0x80> macro_positions = {};
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index e88290754..8fa359d0a 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -114,8 +114,6 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
const u32 block_depth = src_params.block_size.depth;
const size_t src_size =
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
- const size_t src_layer_size =
- CalculateSize(true, bytes_per_pixel, width, height, 1, block_height, block_depth);
if (read_buffer.size() < src_size) {
read_buffer.resize(src_size);
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index 72e2a33d5..ceec05459 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -41,30 +41,30 @@ struct Header {
BitField<26, 1, u32> does_load_or_store;
BitField<27, 1, u32> does_fp64;
BitField<28, 4, u32> stream_out_mask;
- } common0{};
+ } common0;
union {
BitField<0, 24, u32> shader_local_memory_low_size;
BitField<24, 8, u32> per_patch_attribute_count;
- } common1{};
+ } common1;
union {
BitField<0, 24, u32> shader_local_memory_high_size;
BitField<24, 8, u32> threads_per_input_primitive;
- } common2{};
+ } common2;
union {
BitField<0, 24, u32> shader_local_memory_crs_size;
BitField<24, 4, OutputTopology> output_topology;
BitField<28, 4, u32> reserved;
- } common3{};
+ } common3;
union {
BitField<0, 12, u32> max_output_vertices;
BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
BitField<20, 4, u32> reserved;
BitField<24, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
- } common4{};
+ } common4;
union {
struct {
@@ -145,7 +145,7 @@ struct Header {
}
} ps;
- std::array<u32, 0xF> raw{};
+ std::array<u32, 0xF> raw;
};
u64 GetLocalMemorySize() const {
@@ -153,7 +153,6 @@ struct Header {
(common2.shader_local_memory_high_size << 24));
}
};
-
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
} // namespace Tegra::Shader
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 8b2a6a42c..de6991ef6 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -5,15 +5,10 @@
#pragma once
#include <algorithm>
-#include <array>
-#include <memory>
#include <queue>
-#include "common/assert.h"
#include "common/common_types.h"
#include "core/core.h"
-#include "core/memory.h"
-#include "core/settings.h"
#include "video_core/gpu.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
@@ -79,8 +74,6 @@ public:
}
void WaitPendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait()) {
@@ -88,8 +81,8 @@ public:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
@@ -98,13 +91,13 @@ public:
}
protected:
- FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TTextureCache& texture_cache, TTBufferCache& buffer_cache,
- TQueryCache& query_cache)
- : system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
- buffer_cache{buffer_cache}, query_cache{query_cache} {}
+ explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
+ TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
+ TQueryCache& query_cache_)
+ : rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
+ texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
- virtual ~FenceManager() {}
+ virtual ~FenceManager() = default;
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
/// true
@@ -118,16 +111,15 @@ protected:
/// Waits until a fence has been signalled by the host GPU.
virtual void WaitFence(TFence& fence) = 0;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
TTextureCache& texture_cache;
TTBufferCache& buffer_cache;
TQueryCache& query_cache;
private:
void TryReleasePendingFences() {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
while (!fences.empty()) {
TFence& current_fence = fences.front();
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
@@ -135,8 +127,8 @@ private:
}
PopAsyncFlushes();
if (current_fence->IsSemaphore()) {
- memory_manager.template Write<u32>(current_fence->GetAddress(),
- current_fence->GetPayload());
+ gpu_memory.template Write<u32>(current_fence->GetAddress(),
+ current_fence->GetPayload());
} else {
gpu.IncrementSyncPoint(current_fence->GetPayload());
}
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 512578c8b..4bb9256e9 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -27,21 +27,28 @@ namespace Tegra {
MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_, bool is_async)
- : system{system}, renderer{std::move(renderer_)}, is_async{is_async} {
- auto& rasterizer{renderer->Rasterizer()};
- memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
- dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this);
- maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
- fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
- kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
- maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
- kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
- shader_notify = std::make_unique<VideoCore::ShaderNotify>();
-}
+GPU::GPU(Core::System& system_, bool is_async_)
+ : system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(system)},
+ dma_pusher{std::make_unique<Tegra::DmaPusher>(system, *this)},
+ maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
+ fermi_2d{std::make_unique<Engines::Fermi2D>()},
+ kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
+ maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
+ kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
+ shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_} {}
GPU::~GPU() = default;
+void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
+ renderer = std::move(renderer_);
+
+ VideoCore::RasterizerInterface& rasterizer = renderer->Rasterizer();
+ memory_manager->BindRasterizer(rasterizer);
+ maxwell_3d->BindRasterizer(rasterizer);
+ fermi_2d->BindRasterizer(rasterizer);
+ kepler_compute->BindRasterizer(rasterizer);
+}
+
Engines::Maxwell3D& GPU::Maxwell3D() {
return *maxwell_3d;
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index ebfc7b0c7..2d15d1c6f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -142,11 +142,6 @@ class MemoryManager;
class GPU {
public:
- explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- bool is_async);
-
- virtual ~GPU();
-
struct MethodCall {
u32 method{};
u32 argument{};
@@ -162,6 +157,12 @@ public:
method_count(method_count) {}
};
+ explicit GPU(Core::System& system, bool is_async);
+ virtual ~GPU();
+
+ /// Binds a renderer to the GPU.
+ void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
+
/// Calls a GPU method.
void CallMethod(const MethodCall& method_call);
@@ -345,13 +346,12 @@ private:
bool ExecuteMethodOnEngine(u32 method);
protected:
- std::unique_ptr<Tegra::DmaPusher> dma_pusher;
Core::System& system;
+ std::unique_ptr<Tegra::MemoryManager> memory_manager;
+ std::unique_ptr<Tegra::DmaPusher> dma_pusher;
std::unique_ptr<VideoCore::RendererBase> renderer;
private:
- std::unique_ptr<Tegra::MemoryManager> memory_manager;
-
/// Mapping of command subchannels to their bound engine ids
std::array<EngineID, 8> bound_engines = {};
/// 3D engine
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 7b855f63e..70a3d5738 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -10,16 +10,14 @@
namespace VideoCommon {
-GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer_,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
- : GPU(system, std::move(renderer_), true), gpu_thread{system},
- cpu_context(renderer->GetRenderWindow().CreateSharedContext()),
- gpu_context(std::move(context)) {}
+GPUAsynch::GPUAsynch(Core::System& system) : GPU{system, true}, gpu_thread{system} {}
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
- gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
+ gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
+ cpu_context = renderer->GetRenderWindow().CreateSharedContext();
+ cpu_context->MakeCurrent();
}
void GPUAsynch::ObtainContext() {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 15e9f1d38..f89c855a5 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -20,8 +20,7 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
class GPUAsynch final : public Tegra::GPU {
public:
- explicit GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+ explicit GPUAsynch(Core::System& system);
~GPUAsynch() override;
void Start() override;
@@ -42,7 +41,6 @@ protected:
private:
GPUThread::ThreadManager gpu_thread;
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
- std::unique_ptr<Core::Frontend::GraphicsContext> gpu_context;
};
} // namespace VideoCommon
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index aaeb9811d..1ca47ddef 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -7,20 +7,18 @@
namespace VideoCommon {
-GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context)
- : GPU(system, std::move(renderer), false), context{std::move(context)} {}
+GPUSynch::GPUSynch(Core::System& system) : GPU{system, false} {}
GPUSynch::~GPUSynch() = default;
void GPUSynch::Start() {}
void GPUSynch::ObtainContext() {
- context->MakeCurrent();
+ renderer->Context().MakeCurrent();
}
void GPUSynch::ReleaseContext() {
- context->DoneCurrent();
+ renderer->Context().DoneCurrent();
}
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 762c20aa5..297258cb1 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -19,8 +19,7 @@ namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
class GPUSynch final : public Tegra::GPU {
public:
- explicit GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
- std::unique_ptr<Core::Frontend::GraphicsContext>&& context);
+ explicit GPUSynch(Core::System& system);
~GPUSynch() override;
void Start() override;
@@ -36,9 +35,6 @@ public:
protected:
void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
[[maybe_unused]] u32 value) const override {}
-
-private:
- std::unique_ptr<Core::Frontend::GraphicsContext> context;
};
} // namespace VideoCommon
diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt
new file mode 100644
index 000000000..aa62363a7
--- /dev/null
+++ b/src/video_core/host_shaders/CMakeLists.txt
@@ -0,0 +1,43 @@
+set(SHADER_FILES
+ opengl_present.frag
+ opengl_present.vert
+)
+
+set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
+set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
+
+set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
+add_custom_command(
+ OUTPUT
+ ${SHADER_DIR}
+ COMMAND
+ ${CMAKE_COMMAND} -E make_directory ${SHADER_DIR}
+)
+
+set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
+set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
+
+foreach(FILENAME IN ITEMS ${SHADER_FILES})
+ string(REPLACE "." "_" SHADER_NAME ${FILENAME})
+ set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
+ set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
+ add_custom_command(
+ OUTPUT
+ ${HEADER_FILE}
+ COMMAND
+ ${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
+ MAIN_DEPENDENCY
+ ${SOURCE_FILE}
+ DEPENDS
+ ${HEADER_GENERATOR}
+ ${INPUT_FILE}
+ )
+ set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
+endforeach()
+
+add_custom_target(host_shaders
+ DEPENDS
+ ${SHADER_HEADERS}
+ SOURCES
+ ${SHADER_FILES}
+)
diff --git a/src/video_core/host_shaders/StringShaderHeader.cmake b/src/video_core/host_shaders/StringShaderHeader.cmake
new file mode 100644
index 000000000..368bce0ed
--- /dev/null
+++ b/src/video_core/host_shaders/StringShaderHeader.cmake
@@ -0,0 +1,11 @@
+set(SOURCE_FILE ${CMAKE_ARGV3})
+set(HEADER_FILE ${CMAKE_ARGV4})
+set(INPUT_FILE ${CMAKE_ARGV5})
+
+get_filename_component(CONTENTS_NAME ${SOURCE_FILE} NAME)
+string(REPLACE "." "_" CONTENTS_NAME ${CONTENTS_NAME})
+string(TOUPPER ${CONTENTS_NAME} CONTENTS_NAME)
+
+file(READ ${SOURCE_FILE} CONTENTS)
+
+configure_file(${INPUT_FILE} ${HEADER_FILE} @ONLY)
diff --git a/src/video_core/host_shaders/opengl_present.frag b/src/video_core/host_shaders/opengl_present.frag
new file mode 100644
index 000000000..8a4cb024b
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.frag
@@ -0,0 +1,10 @@
+#version 430 core
+
+layout (location = 0) in vec2 frag_tex_coord;
+layout (location = 0) out vec4 color;
+
+layout (binding = 0) uniform sampler2D color_texture;
+
+void main() {
+ color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
+}
diff --git a/src/video_core/host_shaders/opengl_present.vert b/src/video_core/host_shaders/opengl_present.vert
new file mode 100644
index 000000000..2235d31a4
--- /dev/null
+++ b/src/video_core/host_shaders/opengl_present.vert
@@ -0,0 +1,24 @@
+#version 430 core
+
+out gl_PerVertex {
+ vec4 gl_Position;
+};
+
+layout (location = 0) in vec2 vert_position;
+layout (location = 1) in vec2 vert_tex_coord;
+layout (location = 0) out vec2 frag_tex_coord;
+
+// This is a truncated 3x3 matrix for 2D transformations:
+// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
+// The third column performs translation.
+// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
+// implicitly be [0, 0, 1]
+layout (location = 0) uniform mat3x2 modelview_matrix;
+
+void main() {
+ // Multiply input position by the rotscale part of the matrix and then manually translate by
+ // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
+ // to `vec3(vert_position.xy, 1.0)`
+ gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
+ frag_tex_coord = vert_tex_coord;
+}
diff --git a/src/video_core/host_shaders/source_shader.h.in b/src/video_core/host_shaders/source_shader.h.in
new file mode 100644
index 000000000..ccdb0d2a9
--- /dev/null
+++ b/src/video_core/host_shaders/source_shader.h.in
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <string_view>
+
+namespace HostShaders {
+
+constexpr std::string_view @CONTENTS_NAME@ = R"(@CONTENTS@)";
+
+} // namespace HostShaders
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index a50e7b4e0..cd21a2112 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -36,7 +36,7 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
}
} else {
// Macro not compiled, check if it's uploaded and if so, compile it
- std::optional<u32> mid_method = std::nullopt;
+ std::optional<u32> mid_method;
const auto macro_code = uploaded_macro_code.find(method);
if (macro_code == uploaded_macro_code.end()) {
for (const auto& [method_base, code] : uploaded_macro_code) {
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 0c9ff59a4..df00b57df 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -24,7 +24,7 @@ void HLE_771BB18C62444DA0(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.index_array.first = parameters[4];
if (maxwell3d.ShouldExecute()) {
- maxwell3d.GetRasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, true);
}
maxwell3d.regs.index_array.count = 0;
maxwell3d.mme_draw.instance_count = 0;
@@ -42,7 +42,7 @@ void HLE_0D61FC9FAAC9FCAD(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.mme_draw.instance_count = count;
if (maxwell3d.ShouldExecute()) {
- maxwell3d.GetRasterizer().Draw(false, true);
+ maxwell3d.Rasterizer().Draw(false, true);
}
maxwell3d.regs.vertex_buffer.count = 0;
maxwell3d.mme_draw.instance_count = 0;
@@ -65,7 +65,7 @@ void HLE_0217920100488FF7(Engines::Maxwell3D& maxwell3d, const std::vector<u32>&
maxwell3d.regs.draw.topology.Assign(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]));
if (maxwell3d.ShouldExecute()) {
- maxwell3d.GetRasterizer().Draw(true, true);
+ maxwell3d.Rasterizer().Draw(true, true);
}
maxwell3d.regs.reg_array[0x446] = 0x0; // vertex id base?
maxwell3d.regs.index_array.count = 0;
diff --git a/src/video_core/macro/macro_interpreter.cpp b/src/video_core/macro/macro_interpreter.cpp
index aa5256419..bd01fd1f2 100644
--- a/src/video_core/macro/macro_interpreter.cpp
+++ b/src/video_core/macro/macro_interpreter.cpp
@@ -34,7 +34,6 @@ void MacroInterpreterImpl::Execute(const std::vector<u32>& parameters, u32 metho
this->parameters = std::make_unique<u32[]>(num_parameters);
}
std::memcpy(this->parameters.get(), parameters.data(), num_parameters * sizeof(u32));
- this->num_parameters = num_parameters;
// Execute the code until we hit an exit condition.
bool keep_executing = true;
diff --git a/src/video_core/macro/macro_jit_x64.cpp b/src/video_core/macro/macro_jit_x64.cpp
index c1b9e4ad9..954b87515 100644
--- a/src/video_core/macro/macro_jit_x64.cpp
+++ b/src/video_core/macro/macro_jit_x64.cpp
@@ -14,11 +14,11 @@ MICROPROFILE_DEFINE(MacroJitCompile, "GPU", "Compile macro JIT", MP_RGB(173, 255
MICROPROFILE_DEFINE(MacroJitExecute, "GPU", "Execute macro JIT", MP_RGB(255, 255, 0));
namespace Tegra {
-static const Xbyak::Reg64 STATE = Xbyak::util::rbx;
-static const Xbyak::Reg32 RESULT = Xbyak::util::ebp;
-static const Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
-static const Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
-static const Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
+constexpr Xbyak::Reg64 STATE = Xbyak::util::rbx;
+constexpr Xbyak::Reg32 RESULT = Xbyak::util::ebp;
+constexpr Xbyak::Reg64 PARAMETERS = Xbyak::util::r12;
+constexpr Xbyak::Reg32 METHOD_ADDRESS = Xbyak::util::r14d;
+constexpr Xbyak::Reg64 BRANCH_HOLDER = Xbyak::util::r15;
static const std::bitset<32> PERSISTENT_REGISTERS = Common::X64::BuildRegSet({
STATE,
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 844164645..02cf53d15 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -14,11 +14,15 @@
namespace Tegra {
-MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
- : system{system}, rasterizer{rasterizer}, page_table(page_table_size) {}
+MemoryManager::MemoryManager(Core::System& system_)
+ : system{system_}, page_table(page_table_size) {}
MemoryManager::~MemoryManager() = default;
+void MemoryManager::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
+ rasterizer = &rasterizer_;
+}
+
GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size) {
u64 remaining_size{size};
for (u64 offset{}; offset < size; offset += page_size) {
@@ -54,7 +58,7 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
std::optional<GPUVAddr> MemoryManager::AllocateFixed(GPUVAddr gpu_addr, std::size_t size) {
for (u64 offset{}; offset < size; offset += page_size) {
if (!GetPageEntry(gpu_addr + offset).IsUnmapped()) {
- return {};
+ return std::nullopt;
}
}
@@ -131,13 +135,13 @@ std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size
}
}
- return {};
+ return std::nullopt;
}
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
const auto page_entry{GetPageEntry(gpu_addr)};
if (!page_entry.IsValid()) {
- return {};
+ return std::nullopt;
}
return page_entry.ToAddress() + (gpu_addr & page_mask);
@@ -217,7 +221,7 @@ void MemoryManager::ReadBlock(GPUVAddr gpu_src_addr, void* dest_buffer, std::siz
// Flush must happen on the rasterizer interface, such that memory is always synchronous
// when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu.
- rasterizer.FlushRegion(src_addr, copy_amount);
+ rasterizer->FlushRegion(src_addr, copy_amount);
system.Memory().ReadBlockUnsafe(src_addr, dest_buffer, copy_amount);
}
@@ -266,7 +270,7 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, s
// Invalidate must happen on the rasterizer interface, such that memory is always
// synchronous when it is written (even when in asynchronous GPU mode).
- rasterizer.InvalidateRegion(dest_addr, copy_amount);
+ rasterizer->InvalidateRegion(dest_addr, copy_amount);
system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
}
@@ -312,10 +316,10 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_add
WriteBlockUnsafe(gpu_dest_addr, tmp_buffer.data(), size);
}
-bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) {
+bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
const auto cpu_addr{GpuToCpuAddress(gpu_addr)};
if (!cpu_addr) {
- return {};
+ return false;
}
const std::size_t page{(*cpu_addr & Core::Memory::PAGE_MASK) + size};
return page <= Core::Memory::PAGE_SIZE;
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 681bd9588..53c8d122a 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -31,19 +31,19 @@ public:
constexpr PageEntry(State state) : state{state} {}
constexpr PageEntry(VAddr addr) : state{static_cast<State>(addr >> ShiftBits)} {}
- constexpr bool IsUnmapped() const {
+ [[nodiscard]] constexpr bool IsUnmapped() const {
return state == State::Unmapped;
}
- constexpr bool IsAllocated() const {
+ [[nodiscard]] constexpr bool IsAllocated() const {
return state == State::Allocated;
}
- constexpr bool IsValid() const {
+ [[nodiscard]] constexpr bool IsValid() const {
return !IsUnmapped() && !IsAllocated();
}
- constexpr VAddr ToAddress() const {
+ [[nodiscard]] constexpr VAddr ToAddress() const {
if (!IsValid()) {
return {};
}
@@ -51,7 +51,7 @@ public:
return static_cast<VAddr>(state) << ShiftBits;
}
- constexpr PageEntry operator+(u64 offset) {
+ [[nodiscard]] constexpr PageEntry operator+(u64 offset) const {
// If this is a reserved value, offsets do not apply
if (!IsValid()) {
return *this;
@@ -68,19 +68,22 @@ static_assert(sizeof(PageEntry) == 4, "PageEntry is too large");
class MemoryManager final {
public:
- explicit MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer);
+ explicit MemoryManager(Core::System& system);
~MemoryManager();
- std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
+ /// Binds a renderer to the memory manager.
+ void BindRasterizer(VideoCore::RasterizerInterface& rasterizer);
+
+ [[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
template <typename T>
- T Read(GPUVAddr addr) const;
+ [[nodiscard]] T Read(GPUVAddr addr) const;
template <typename T>
void Write(GPUVAddr addr, T data);
- u8* GetPointer(GPUVAddr addr);
- const u8* GetPointer(GPUVAddr addr) const;
+ [[nodiscard]] u8* GetPointer(GPUVAddr addr);
+ [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
/**
* ReadBlock and WriteBlock are full read and write operations over virtual
@@ -109,24 +112,24 @@ public:
/**
* IsGranularRange checks if a gpu region can be simply read with a pointer.
*/
- bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size);
+ [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
- GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
- GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
- std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
- GPUVAddr Allocate(std::size_t size, std::size_t align);
+ [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
+ [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
+ [[nodiscard]] std::optional<GPUVAddr> AllocateFixed(GPUVAddr gpu_addr, std::size_t size);
+ [[nodiscard]] GPUVAddr Allocate(std::size_t size, std::size_t align);
void Unmap(GPUVAddr gpu_addr, std::size_t size);
private:
- PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
+ [[nodiscard]] PageEntry GetPageEntry(GPUVAddr gpu_addr) const;
void SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size = page_size);
GPUVAddr UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std::size_t size);
- std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
+ [[nodiscard]] std::optional<GPUVAddr> FindFreeRange(std::size_t size, std::size_t align) const;
void TryLockPage(PageEntry page_entry, std::size_t size);
void TryUnlockPage(PageEntry page_entry, std::size_t size);
- static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
+ [[nodiscard]] static constexpr std::size_t PageEntryIndex(GPUVAddr gpu_addr) {
return (gpu_addr >> page_bits) & page_table_mask;
}
@@ -141,7 +144,7 @@ private:
Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
+ VideoCore::RasterizerInterface* rasterizer = nullptr;
std::vector<PageEntry> page_table;
};
diff --git a/src/video_core/query_cache.h b/src/video_core/query_cache.h
index 0d3a88765..fc54ca0ef 100644
--- a/src/video_core/query_cache.h
+++ b/src/video_core/query_cache.h
@@ -91,14 +91,15 @@ private:
std::shared_ptr<HostCounter> last;
};
-template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
- class QueryPool>
+template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
class QueryCacheBase {
public:
- explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
- : system{system}, rasterizer{rasterizer}, streams{{CounterStream{
- static_cast<QueryCache&>(*this),
- VideoCore::QueryType::SamplesPassed}}} {}
+ explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::MemoryManager& gpu_memory_)
+ : rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
+ VideoCore::QueryType::SamplesPassed}}} {}
void InvalidateRegion(VAddr addr, std::size_t size) {
std::unique_lock lock{mutex};
@@ -118,29 +119,27 @@ public:
*/
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
std::unique_lock lock{mutex};
- auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr_opt);
- VAddr cpu_addr = *cpu_addr_opt;
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
+ ASSERT(cpu_addr);
- CachedQuery* query = TryGet(cpu_addr);
+ CachedQuery* query = TryGet(*cpu_addr);
if (!query) {
- ASSERT_OR_EXECUTE(cpu_addr_opt, return;);
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+ ASSERT_OR_EXECUTE(cpu_addr, return;);
+ u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
- query = Register(type, cpu_addr, host_ptr, timestamp.has_value());
+ query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
}
query->BindCounter(Stream(type).Current(), timestamp);
if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- AsyncFlushQuery(cpu_addr);
+ AsyncFlushQuery(*cpu_addr);
}
}
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
void UpdateCounters() {
std::unique_lock lock{mutex};
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
}
@@ -206,9 +205,6 @@ public:
committed_flushes.pop_front();
}
-protected:
- std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
-
private:
/// Flushes a memory range to guest memory and removes it from the cache.
void FlushAndRemoveRegion(VAddr addr, std::size_t size) {
@@ -270,8 +266,9 @@ private:
static constexpr std::uintptr_t PAGE_SIZE = 4096;
static constexpr unsigned PAGE_BITS = 12;
- Core::System& system;
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
std::recursive_mutex mutex;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 3cbdac8e7..b3e0919f8 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -106,11 +106,8 @@ public:
virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
/// Initialize disk cached resources for the game being emulated
- virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
- const DiskResourceLoadCallback& callback = {}) {}
-
- /// Initializes renderer dirty flags
- virtual void SetupDirtyFlags() {}
+ virtual void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
+ const DiskResourceLoadCallback& callback) {}
/// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
GuestDriverProfile& AccessGuestDriverProfile() {
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index dfb06e87e..a93a1732c 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -9,7 +9,9 @@
namespace VideoCore {
-RendererBase::RendererBase(Core::Frontend::EmuWindow& window) : render_window{window} {
+RendererBase::RendererBase(Core::Frontend::EmuWindow& window_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context_)
+ : render_window{window_}, context{std::move(context_)} {
RefreshBaseSettings();
}
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d85219b6..5c650808b 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -15,7 +15,8 @@
namespace Core::Frontend {
class EmuWindow;
-}
+class GraphicsContext;
+} // namespace Core::Frontend
namespace VideoCore {
@@ -25,14 +26,15 @@ struct RendererSettings {
// Screenshot
std::atomic<bool> screenshot_requested{false};
- void* screenshot_bits;
+ void* screenshot_bits{};
std::function<void()> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout;
};
class RendererBase : NonCopyable {
public:
- explicit RendererBase(Core::Frontend::EmuWindow& window);
+ explicit RendererBase(Core::Frontend::EmuWindow& window,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
virtual ~RendererBase();
/// Initialize the renderer
@@ -44,11 +46,6 @@ public:
/// Finalize rendering the guest frame and draw into the presentation texture
virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
- /// Draws the latest frame to the window waiting timeout_ms for a frame to arrive (Renderer
- /// specific implementation)
- /// Returns true if a frame was drawn
- virtual bool TryPresent(int timeout_ms) = 0;
-
// Getter/setter functions:
// ------------------------
@@ -68,6 +65,14 @@ public:
return *rasterizer;
}
+ Core::Frontend::GraphicsContext& Context() {
+ return *context;
+ }
+
+ const Core::Frontend::GraphicsContext& Context() const {
+ return *context;
+ }
+
Core::Frontend::EmuWindow& GetRenderWindow() {
return render_window;
}
@@ -94,6 +99,7 @@ public:
protected:
Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle.
std::unique_ptr<RasterizerInterface> rasterizer;
+ std::unique_ptr<Core::Frontend::GraphicsContext> context;
f32 m_current_fps = 0.0f; ///< Current framerate, should be set by the renderer
int m_current_frame = 0; ///< Current frame, should be set by the renderer
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index e866d8f2f..b1c4cd62f 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -59,9 +59,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
}
-OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
+OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device_, std::size_t stream_size)
- : GenericBufferCache{rasterizer, system,
+ : GenericBufferCache{rasterizer, gpu_memory, cpu_memory,
std::make_unique<OGLStreamBuffer>(device_, stream_size, true)},
device{device_} {
if (!device.HasFastBufferSubData()) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 88fdc0536..f75b32e31 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -52,7 +52,8 @@ private:
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
class OGLBufferCache final : public GenericBufferCache {
public:
- explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
+ explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const Device& device, std::size_t stream_size);
~OGLBufferCache();
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index e7d95149f..a94e4f72e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -193,7 +193,6 @@ bool IsASTCSupported() {
Device::Device()
: max_uniform_buffers{BuildMaxUniformBuffers()}, base_bindings{BuildBaseBindings()} {
const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
- const std::string_view renderer = reinterpret_cast<const char*>(glGetString(GL_RENDERER));
const std::string_view version = reinterpret_cast<const char*>(glGetString(GL_VERSION));
const std::vector extensions = GetExtensions();
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp
index ec5421afa..b532fdcc2 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp
@@ -4,16 +4,17 @@
#include "common/assert.h"
+#include <glad/glad.h>
+
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_fence_manager.h"
namespace OpenGL {
-GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
+GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed) : FenceBase(payload, is_stubbed) {}
GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
- : VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
+ : FenceBase(address, payload, is_stubbed) {}
GLInnerFence::~GLInnerFence() = default;
@@ -44,11 +45,10 @@ void GLInnerFence::Wait() {
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
}
-FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
+FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
TextureCacheOpenGL& texture_cache,
OGLBufferCache& buffer_cache, QueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
+ : GenericFenceManager{rasterizer, gpu, texture_cache, buffer_cache, query_cache} {}
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<GLInnerFence>(value, is_stubbed);
diff --git a/src/video_core/renderer_opengl/gl_fence_manager.h b/src/video_core/renderer_opengl/gl_fence_manager.h
index c917b3343..da1dcdace 100644
--- a/src/video_core/renderer_opengl/gl_fence_manager.h
+++ b/src/video_core/renderer_opengl/gl_fence_manager.h
@@ -5,7 +5,6 @@
#pragma once
#include <memory>
-#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/fence_manager.h"
@@ -38,9 +37,9 @@ using GenericFenceManager =
class FenceManagerOpenGL final : public GenericFenceManager {
public:
- FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
- QueryCache& query_cache);
+ explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
+ QueryCache& query_cache);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp
index d7ba57aca..1a3d9720e 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_query_cache.cpp
@@ -30,12 +30,11 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
} // Anonymous namespace
-QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
- : VideoCommon::QueryCacheBase<
- QueryCache, CachedQuery, CounterStream, HostCounter,
- std::vector<OGLQuery>>{system,
- static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
- gl_rasterizer{gl_rasterizer} {}
+QueryCache::QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory)
+ : VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter>(
+ rasterizer, maxwell3d, gpu_memory),
+ gl_rasterizer{rasterizer} {}
QueryCache::~QueryCache() = default;
@@ -90,6 +89,8 @@ u64 HostCounter::BlockingQuery() const {
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
+CachedQuery::~CachedQuery() = default;
+
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
: VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
diff --git a/src/video_core/renderer_opengl/gl_query_cache.h b/src/video_core/renderer_opengl/gl_query_cache.h
index d8e7052a1..82cac51ee 100644
--- a/src/video_core/renderer_opengl/gl_query_cache.h
+++ b/src/video_core/renderer_opengl/gl_query_cache.h
@@ -26,10 +26,11 @@ class RasterizerOpenGL;
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
-class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
- HostCounter, std::vector<OGLQuery>> {
+class QueryCache final
+ : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
+ explicit QueryCache(RasterizerOpenGL& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory);
~QueryCache();
OGLQuery AllocateQuery(VideoCore::QueryType type);
@@ -40,6 +41,7 @@ public:
private:
RasterizerOpenGL& gl_rasterizer;
+ std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
@@ -62,10 +64,12 @@ class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
public:
explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
u8* host_ptr);
- CachedQuery(CachedQuery&& rhs) noexcept;
- CachedQuery(const CachedQuery&) = delete;
+ ~CachedQuery() override;
+ CachedQuery(CachedQuery&& rhs) noexcept;
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
+
+ CachedQuery(const CachedQuery&) = delete;
CachedQuery& operator=(const CachedQuery&) = delete;
void Flush() override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4af5824cd..bbb2eb17c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -153,16 +153,19 @@ void UpdateBindlessPointers(GLenum target, GLuint64EXT* pointers, std::size_t nu
} // Anonymous namespace
-RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker)
- : RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
- state_tracker},
- shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
- buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
- fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
- screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
- async_shaders{emu_window} {
+RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+ Core::Memory::Memory& cpu_memory, const Device& device_,
+ ScreenInfo& screen_info_, ProgramManager& program_manager_,
+ StateTracker& state_tracker_)
+ : RasterizerAccelerated{cpu_memory}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
+ kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
+ screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
+ texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
+ shader_cache(*this, emu_window, gpu, maxwell3d, kepler_compute, gpu_memory, device),
+ query_cache(*this, maxwell3d, gpu_memory),
+ buffer_cache(*this, gpu_memory, cpu_memory, device, STREAM_BUFFER_SIZE),
+ fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+ async_shaders(emu_window) {
CheckExtensions();
unified_uniform_buffer.Create();
@@ -196,8 +199,7 @@ void RasterizerOpenGL::CheckExtensions() {
}
void RasterizerOpenGL::SetupVertexFormat() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
@@ -217,7 +219,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
flags[Dirty::VertexFormat0 + index] = false;
- const auto attrib = gpu.regs.vertex_attrib_format[index];
+ const auto attrib = maxwell3d.regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
@@ -241,8 +243,7 @@ void RasterizerOpenGL::SetupVertexFormat() {
}
void RasterizerOpenGL::SetupVertexBuffer() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexBuffers]) {
return;
}
@@ -253,7 +254,7 @@ void RasterizerOpenGL::SetupVertexBuffer() {
const bool use_unified_memory = device.HasVertexBufferUnifiedMemory();
// Upload all guest vertex arrays sequentially to our buffer
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_BINDINGS; ++index) {
if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
@@ -290,14 +291,13 @@ void RasterizerOpenGL::SetupVertexBuffer() {
}
void RasterizerOpenGL::SetupVertexInstances() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
}
flags[Dirty::VertexInstances] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) {
continue;
@@ -313,7 +313,7 @@ void RasterizerOpenGL::SetupVertexInstances() {
GLintptr RasterizerOpenGL::SetupIndexBuffer() {
MICROPROFILE_SCOPE(OpenGL_Index);
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t size = CalculateIndexBufferSize();
const auto info = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, info.handle);
@@ -322,15 +322,14 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
- auto& gpu = system.GPU().Maxwell3D();
u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
- const auto& shader_config = gpu.regs.shader_config[index];
+ const auto& shader_config = maxwell3d.regs.shader_config[index];
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
program_manager.UseGeometryShader(0);
@@ -391,11 +390,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
SyncClipEnabled(clip_distances);
- gpu.dirty.flags[Dirty::Shaders] = false;
+ maxwell3d.dirty.flags[Dirty::Shaders] = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -413,34 +412,27 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
}
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+ return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+ static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
-void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
+void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- shader_cache.LoadDiskCache(stop_loading, callback);
-}
-
-void RasterizerOpenGL::SetupDirtyFlags() {
- state_tracker.Initialize();
+ shader_cache.LoadDiskCache(title_id, stop_loading, callback);
}
void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
- auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
+ if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
return;
}
- gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
+ maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true);
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
@@ -472,8 +464,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
}
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
- auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
texture_cache.GuardRenderTargets(true);
View color_surface;
@@ -523,12 +514,11 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_de
}
void RasterizerOpenGL::Clear() {
- const auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.ShouldExecute()) {
+ if (!maxwell3d.ShouldExecute()) {
return;
}
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
@@ -593,7 +583,6 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
- auto& gpu = system.GPU().Maxwell3D();
query_cache.UpdateCounters();
@@ -641,7 +630,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
if (invalidated) {
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
- auto& dirty = gpu.dirty.flags;
+ auto& dirty = maxwell3d.dirty.flags;
dirty[Dirty::VertexBuffers] = true;
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
dirty[index] = true;
@@ -662,7 +651,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup emulation uniform buffer.
if (!device.UseAssemblyShaders()) {
MaxwellUniformData ubo;
- ubo.SetFromRegs(gpu);
+ ubo.SetFromRegs(maxwell3d);
const auto info =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, info.handle, info.offset,
@@ -671,7 +660,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Setup shaders and their used resources.
texture_cache.GuardSamplers(true);
- const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(gpu.regs.draw.topology);
+ const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
SetupShaders(primitive_mode);
texture_cache.GuardSamplers(false);
@@ -688,14 +677,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
BeginTransformFeedback(primitive_mode);
- const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
+ const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
const GLsizei num_instances =
- static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
+ static_cast<GLsizei>(is_instanced ? maxwell3d.mme_draw.instance_count : 1);
if (is_indexed) {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vb_element_base);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.index_array.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vb_element_base);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.index_array.count);
const GLvoid* offset = reinterpret_cast<const GLvoid*>(index_buffer_offset);
- const GLenum format = MaxwellToGL::IndexFormat(gpu.regs.index_array.format);
+ const GLenum format = MaxwellToGL::IndexFormat(maxwell3d.regs.index_array.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
@@ -714,8 +703,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
base_instance);
}
} else {
- const GLint base_vertex = static_cast<GLint>(gpu.regs.vertex_buffer.first);
- const GLsizei num_vertices = static_cast<GLsizei>(gpu.regs.vertex_buffer.count);
+ const GLint base_vertex = static_cast<GLint>(maxwell3d.regs.vertex_buffer.first);
+ const GLsizei num_vertices = static_cast<GLsizei>(maxwell3d.regs.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
@@ -730,7 +719,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands;
- system.GPU().TickWork();
+ gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -753,7 +742,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
buffer_cache.Unmap();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
+ program_manager.BindCompute(kernel->GetHandle());
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -815,17 +805,14 @@ void RasterizerOpenGL::SyncGuestHost() {
}
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- auto& memory_manager{gpu.MemoryManager()};
- memory_manager.Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -834,7 +821,6 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
}
void RasterizerOpenGL::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -920,7 +906,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
+ const auto& stages = maxwell3d.state.shader_stages;
const auto& shader_stage = stages[stage_index];
const auto& entries = shader->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
@@ -945,7 +931,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* sh
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
const auto& entries = kernel->GetEntries();
const bool use_unified = entries.use_unified_uniforms;
@@ -1018,9 +1004,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
};
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto& cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
+ const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
const auto& entries{shader->GetEntries().global_memory_entries};
std::array<GLuint64EXT, 32> pointers;
@@ -1030,8 +1014,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
u32 binding = assembly_shaders ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
- const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
- const u32 size{memory_manager.Read<u32>(addr + 8)};
+ const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+ const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
++binding;
}
@@ -1041,9 +1025,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
}
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
- auto& gpu{system.GPU()};
- auto& memory_manager{gpu.MemoryManager()};
- const auto& cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
+ const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
const auto& entries{kernel->GetEntries().global_memory_entries};
std::array<GLuint64EXT, 32> pointers;
@@ -1052,8 +1034,8 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
u32 binding = 0;
for (const auto& entry : entries) {
const GPUVAddr addr{cbufs[entry.cbuf_index].Address() + entry.cbuf_offset};
- const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
- const u32 size{memory_manager.Read<u32>(addr + 8)};
+ const GPUVAddr gpu_addr{gpu_memory.Read<u64>(addr)};
+ const u32 size{gpu_memory.Read<u32>(addr + 8)};
SetupGlobalMemory(binding, entry, gpu_addr, size, &pointers[binding]);
++binding;
}
@@ -1077,7 +1059,6 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetEntries().samplers) {
const auto shader_type = static_cast<ShaderType>(stage_index);
@@ -1090,11 +1071,10 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
- const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : kernel->GetEntries().samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
+ const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
}
}
@@ -1118,20 +1098,18 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
}
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
- const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
for (const auto& entry : shader->GetEntries().images) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
+ const auto shader_type = static_cast<ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
}
}
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
- const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
for (const auto& entry : shader->GetEntries().images) {
- const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
+ const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
}
}
@@ -1151,9 +1129,8 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
}
void RasterizerOpenGL::SyncViewport() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
const bool dirty_viewport = flags[Dirty::Viewports];
const bool dirty_clip_control = flags[Dirty::ClipControl];
@@ -1225,25 +1202,23 @@ void RasterizerOpenGL::SyncViewport() {
}
void RasterizerOpenGL::SyncDepthClamp() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::DepthClampEnabled]) {
return;
}
flags[Dirty::DepthClampEnabled] = false;
- oglEnable(GL_DEPTH_CLAMP, gpu.regs.view_volume_clip_control.depth_clamp_disabled == 0);
+ oglEnable(GL_DEPTH_CLAMP, maxwell3d.regs.view_volume_clip_control.depth_clamp_disabled == 0);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
- clip_mask &= gpu.regs.clip_distance_enabled;
+ clip_mask &= maxwell3d.regs.clip_distance_enabled;
if (clip_mask == last_clip_distance_mask) {
return;
}
@@ -1259,9 +1234,8 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::CullTest]) {
flags[Dirty::CullTest] = false;
@@ -1276,26 +1250,24 @@ void RasterizerOpenGL::SyncCullMode() {
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PrimitiveRestart]) {
return;
}
flags[Dirty::PrimitiveRestart] = false;
- if (gpu.regs.primitive_restart.enabled) {
+ if (maxwell3d.regs.primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
- glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
+ glPrimitiveRestartIndex(maxwell3d.regs.primitive_restart.index);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
void RasterizerOpenGL::SyncDepthTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
- const auto& regs = gpu.regs;
if (flags[Dirty::DepthMask]) {
flags[Dirty::DepthMask] = false;
glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
@@ -1313,14 +1285,13 @@ void RasterizerOpenGL::SyncDepthTestState() {
}
void RasterizerOpenGL::SyncStencilTestState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::StencilTest]) {
return;
}
flags[Dirty::StencilTest] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
@@ -1345,25 +1316,24 @@ void RasterizerOpenGL::SyncStencilTestState() {
}
void RasterizerOpenGL::SyncRasterizeEnable() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::RasterizeEnable]) {
return;
}
flags[Dirty::RasterizeEnable] = false;
- oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
+ oglEnable(GL_RASTERIZER_DISCARD, maxwell3d.regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
- if (gpu.regs.fill_rectangle) {
+ const auto& regs = maxwell3d.regs;
+ if (regs.fill_rectangle) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
@@ -1376,27 +1346,26 @@ void RasterizerOpenGL::SyncPolygonModes() {
return;
}
- if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
+ if (regs.polygon_mode_front == regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
return;
}
if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false;
- glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
}
if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false;
- glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
+ glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
}
}
void RasterizerOpenGL::SyncColorMask() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::ColorMasks]) {
return;
}
@@ -1405,7 +1374,7 @@ void RasterizerOpenGL::SyncColorMask() {
const bool force = flags[Dirty::ColorMaskCommon];
flags[Dirty::ColorMaskCommon] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.color_mask_common) {
if (!force && !flags[Dirty::ColorMask0]) {
return;
@@ -1430,33 +1399,30 @@ void RasterizerOpenGL::SyncColorMask() {
}
void RasterizerOpenGL::SyncMultiSampleState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::MultisampleControl]) {
return;
}
flags[Dirty::MultisampleControl] = false;
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FragmentClampColor]) {
return;
}
flags[Dirty::FragmentClampColor] = false;
- glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR, maxwell3d.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
- const auto& regs = gpu.regs;
+ auto& flags = maxwell3d.dirty.flags;
+ const auto& regs = maxwell3d.regs;
if (flags[Dirty::BlendColor]) {
flags[Dirty::BlendColor] = false;
@@ -1513,14 +1479,13 @@ void RasterizerOpenGL::SyncBlendState() {
}
void RasterizerOpenGL::SyncLogicOpState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LogicOp]) {
return;
}
flags[Dirty::LogicOp] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.logic_op.enable) {
glEnable(GL_COLOR_LOGIC_OP);
glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
@@ -1530,14 +1495,13 @@ void RasterizerOpenGL::SyncLogicOpState() {
}
void RasterizerOpenGL::SyncScissorTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::Scissors]) {
return;
}
flags[Dirty::Scissors] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!flags[Dirty::Scissor0 + index]) {
continue;
@@ -1556,16 +1520,15 @@ void RasterizerOpenGL::SyncScissorTest() {
}
void RasterizerOpenGL::SyncPointState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PointSize]) {
return;
}
flags[Dirty::PointSize] = false;
- oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
+ oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
- if (gpu.regs.vp_point_size.enable) {
+ if (maxwell3d.regs.vp_point_size.enable) {
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
glEnable(GL_PROGRAM_POINT_SIZE);
return;
@@ -1573,32 +1536,30 @@ void RasterizerOpenGL::SyncPointState() {
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
- glPointSize(std::max(1.0f, gpu.regs.point_size));
+ glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncLineState() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::LineWidth]) {
return;
}
flags[Dirty::LineWidth] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_LINE_SMOOTH, regs.line_smooth_enable);
glLineWidth(regs.line_smooth_enable ? regs.line_width_smooth : regs.line_width_aliased);
}
void RasterizerOpenGL::SyncPolygonOffset() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::PolygonOffset]) {
return;
}
flags[Dirty::PolygonOffset] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
@@ -1612,14 +1573,13 @@ void RasterizerOpenGL::SyncPolygonOffset() {
}
void RasterizerOpenGL::SyncAlphaTest() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::AlphaTest]) {
return;
}
flags[Dirty::AlphaTest] = false;
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
}
@@ -1633,20 +1593,19 @@ void RasterizerOpenGL::SyncAlphaTest() {
}
void RasterizerOpenGL::SyncFramebufferSRGB() {
- auto& gpu = system.GPU().Maxwell3D();
- auto& flags = gpu.dirty.flags;
+ auto& flags = maxwell3d.dirty.flags;
if (!flags[Dirty::FramebufferSRGB]) {
return;
}
flags[Dirty::FramebufferSRGB] = false;
- oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
+ oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d.regs.framebuffer_srgb);
}
void RasterizerOpenGL::SyncTransformFeedback() {
// TODO(Rodrigo): Inject SKIP_COMPONENTS*_NV when required. An unimplemented message will signal
// when this is required.
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
static constexpr std::size_t STRIDE = 3;
std::array<GLint, 128 * STRIDE * Maxwell::NumTransformFeedbackBuffers> attribs;
@@ -1698,7 +1657,7 @@ void RasterizerOpenGL::SyncTransformFeedback() {
}
void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -1741,7 +1700,7 @@ void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
}
void RasterizerOpenGL::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index ccc6f50f6..f451404b2 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -36,8 +36,8 @@
#include "video_core/shader/async_shaders.h"
#include "video_core/textures/texture.h"
-namespace Core {
-class System;
+namespace Core::Memory {
+class Memory;
}
namespace Core::Frontend {
@@ -55,9 +55,10 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- const Device& device, ScreenInfo& info,
- ProgramManager& program_manager, StateTracker& state_tracker);
+ explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ Core::Memory::Memory& cpu_memory, const Device& device,
+ ScreenInfo& screen_info, ProgramManager& program_manager,
+ StateTracker& state_tracker);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -83,9 +84,8 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void LoadDiskResources(const std::atomic_bool& stop_loading,
+ void LoadDiskResources(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
- void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
@@ -237,7 +237,15 @@ private:
void SetupShaders(GLenum primitive_mode);
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
const Device& device;
+ ScreenInfo& screen_info;
+ ProgramManager& program_manager;
+ StateTracker& state_tracker;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
@@ -247,10 +255,6 @@ private:
OGLBufferCache buffer_cache;
FenceManagerOpenGL fence_manager;
- Core::System& system;
- ScreenInfo& screen_info;
- ProgramManager& program_manager;
- StateTracker& state_tracker;
VideoCommon::Shader::AsyncShaders async_shaders;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index a787e27d2..0ebcec427 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -82,11 +83,13 @@ void OGLSampler::Release() {
handle = 0;
}
-void OGLShader::Create(const char* source, GLenum type) {
- if (handle != 0)
+void OGLShader::Create(std::string_view source, GLenum type) {
+ if (handle != 0) {
return;
- if (source == nullptr)
+ }
+ if (source.empty()) {
return;
+ }
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
handle = GLShader::LoadShader(source, type);
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index b05cb641c..f48398669 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -4,6 +4,7 @@
#pragma once
+#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/common_types.h"
@@ -127,7 +128,7 @@ public:
return *this;
}
- void Create(const char* source, GLenum type);
+ void Create(std::string_view source, GLenum type);
void Release();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index eb49a36bf..bd56bed0c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -22,6 +22,7 @@
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_arb_decompiler.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
@@ -238,12 +239,11 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(
ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
const auto shader_type = GetShaderType(program_type);
- auto& gpu = params.system.GPU();
+ auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding();
auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
- if (!async_shaders.IsShaderAsync(params.system.GPU()) ||
- !params.device.UseAsynchronousShaders()) {
+ if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
@@ -286,11 +286,10 @@ std::unique_ptr<Shader> Shader::CreateStageFromMemory(
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
ProgramCode code) {
- auto& gpu = params.system.GPU();
+ auto& gpu = params.gpu;
gpu.ShaderNotify().MarkSharderBuilding();
- auto& engine = gpu.KeplerCompute();
- auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
+ auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
const u64 uid = params.unique_identifier;
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
@@ -319,15 +318,20 @@ std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
}
-ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device)
- : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
- emu_window{emu_window}, device{device}, disk_cache{system} {}
+ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer,
+ Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const Device& device_)
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, emu_window{emu_window_}, gpu{gpu_},
+ gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, device{device_} {}
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
-void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
+void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
+ disk_cache.BindTitleID(title_id);
const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
@@ -480,21 +484,19 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
VideoCommon::Shader::AsyncShaders& async_shaders) {
- if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
+ if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
if (last_shader->IsBuilt()) {
return last_shader;
}
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr address{GetShaderAddress(system, program)};
+ const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
auto completed_work = async_shaders.GetCompletedWork();
for (auto& work : completed_work) {
Shader* shader = TryGet(work.cpu_address);
- auto& gpu = system.GPU();
gpu.ShaderNotify().MarkShaderComplete();
if (shader == nullptr) {
continue;
@@ -506,14 +508,13 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
shader->AsyncGLASMBuilt(std::move(work.program.glasm));
}
+ auto& registry = shader->GetRegistry();
+
ShaderDiskCacheEntry entry;
entry.type = work.shader_type;
entry.code = std::move(work.code);
entry.code_b = std::move(work.code_b);
entry.unique_identifier = work.uid;
-
- auto& registry = shader->GetRegistry();
-
entry.bound_buffer = registry.GetBoundBuffer();
entry.graphics_info = registry.GetGraphicsInfo();
entry.keys = registry.GetKeys();
@@ -524,28 +525,28 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
}
// Look up shader in the cache based on address
- const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
return last_shaders[static_cast<std::size_t>(program)] = shader;
}
- const auto host_ptr{memory_manager.GetPointer(address)};
+ const u8* const host_ptr{gpu_memory.GetPointer(address)};
// No shader found - create a new one
- ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
+ ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
ProgramCode code_b;
if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
- const u8* host_ptr_b = memory_manager.GetPointer(address_b);
- code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
+ const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
+ const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
+ code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
}
const std::size_t code_size = code.size() * sizeof(u64);
const u64 unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, maxwell3d, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> shader;
const auto found = runtime_cache.find(unique_identifier);
@@ -567,21 +568,20 @@ Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
}
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
+ const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
return kernel;
}
- const auto host_ptr{memory_manager.GetPointer(code_addr)};
// No kernel found, create a new one
- ProgramCode code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
+ const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
+ ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
const std::size_t code_size{code.size() * sizeof(u64)};
const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
- const ShaderParameters params{system, disk_cache, device,
- *cpu_addr, host_ptr, unique_identifier};
+ const ShaderParameters params{gpu, kepler_compute, disk_cache, device,
+ *cpu_addr, host_ptr, unique_identifier};
std::unique_ptr<Shader> kernel;
const auto found = runtime_cache.find(unique_identifier);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 7528ac686..1708af06a 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -25,8 +25,8 @@
#include "video_core/shader/shader_ir.h"
#include "video_core/shader_cache.h"
-namespace Core {
-class System;
+namespace Tegra {
+class MemoryManager;
}
namespace Core::Frontend {
@@ -57,11 +57,12 @@ struct PrecompiledShader {
};
struct ShaderParameters {
- Core::System& system;
+ Tegra::GPU& gpu;
+ Tegra::Engines::ConstBufferEngineInterface& engine;
ShaderDiskCacheOpenGL& disk_cache;
const Device& device;
VAddr cpu_addr;
- u8* host_ptr;
+ const u8* host_ptr;
u64 unique_identifier;
};
@@ -118,12 +119,14 @@ private:
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
- Core::Frontend::EmuWindow& emu_window, const Device& device);
+ explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::Frontend::EmuWindow& emu_window,
+ Tegra::GPU& gpu, Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const Device& device);
~ShaderCacheOpenGL() override;
/// Loads disk cache for the current game
- void LoadDiskCache(const std::atomic_bool& stop_loading,
+ void LoadDiskCache(u64 title_id, const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback);
/// Gets the current specified shader stage program
@@ -138,9 +141,13 @@ private:
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
const std::unordered_set<GLenum>& supported_formats);
- Core::System& system;
Core::Frontend::EmuWindow& emu_window;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
const Device& device;
+
ShaderDiskCacheOpenGL disk_cache;
std::unordered_map<u64, PrecompiledShader> runtime_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 3f75fcd2b..bbb8fb095 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -813,7 +813,7 @@ private:
const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
const auto it = transform_feedback.find(location);
if (it == transform_feedback.end()) {
- return {};
+ return std::nullopt;
}
return it->second.components;
}
@@ -1295,21 +1295,21 @@ private:
switch (element) {
case 0:
UNIMPLEMENTED();
- return {};
+ return std::nullopt;
case 1:
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return {};
+ return std::nullopt;
}
return {{"gl_Layer", Type::Int}};
case 2:
if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
- return {};
+ return std::nullopt;
}
return {{"gl_ViewportIndex", Type::Int}};
case 3:
return {{"gl_PointSize", Type::Float}};
}
- return {};
+ return std::nullopt;
case Attribute::Index::FrontColor:
return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::FrontSecondaryColor:
@@ -1332,7 +1332,7 @@ private:
Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
- return {};
+ return std::nullopt;
}
}
@@ -1443,8 +1443,10 @@ private:
return expr + ", vec2(0.0), vec2(0.0))";
case TextureType::TextureCube:
return expr + ", vec3(0.0), vec3(0.0))";
+ default:
+ UNREACHABLE();
+ break;
}
- UNREACHABLE();
}
for (const auto& variant : extras) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 52fbab3c1..166ee34e1 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -206,28 +206,32 @@ bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
flat_bindless_samplers.size();
}
-ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
+ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
+void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
+ title_id = title_id_;
+}
+
std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
- const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
+ const bool has_title_id = title_id != 0;
if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
- return {};
+ return std::nullopt;
}
Common::FS::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
- return {};
+ return std::nullopt;
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
- return {};
+ return std::nullopt;
}
if (version < NativeVersion) {
@@ -235,12 +239,12 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
file.Close();
InvalidateTransferable();
is_usable = true;
- return {};
+ return std::nullopt;
}
if (version > NativeVersion) {
LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
"of the emulator, skipping");
- return {};
+ return std::nullopt;
}
// Version is valid, load the shaders
@@ -249,7 +253,7 @@ std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTran
ShaderDiskCacheEntry& entry = entries.emplace_back();
if (!entry.Load(file)) {
LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
- return {};
+ return std::nullopt;
}
}
@@ -290,12 +294,12 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
ShaderCacheVersionHash file_hash{};
if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
precompiled_cache_virtual_file_offset = 0;
- return {};
+ return std::nullopt;
}
if (GetShaderCacheVersionHash() != file_hash) {
LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
precompiled_cache_virtual_file_offset = 0;
- return {};
+ return std::nullopt;
}
std::vector<ShaderDiskCachePrecompiled> entries;
@@ -305,15 +309,16 @@ std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::Lo
if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
!LoadObjectFromPrecompiled(entry.binary_format) ||
!LoadObjectFromPrecompiled(binary_size)) {
- return {};
+ return std::nullopt;
}
entry.binary.resize(binary_size);
if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
- return {};
+ return std::nullopt;
}
}
- return entries;
+
+ return std::move(entries);
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -473,7 +478,7 @@ std::string ShaderDiskCacheOpenGL::GetBaseDir() const {
}
std::string ShaderDiskCacheOpenGL::GetTitleID() const {
- return fmt::format("{:016X}", system.CurrentProcess()->GetTitleID());
+ return fmt::format("{:016X}", title_id);
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index db2bb73bc..aef841c1d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -21,10 +21,6 @@
#include "video_core/engines/shader_type.h"
#include "video_core/shader/registry.h"
-namespace Core {
-class System;
-}
-
namespace Common::FS {
class IOFile;
}
@@ -70,9 +66,12 @@ struct ShaderDiskCachePrecompiled {
class ShaderDiskCacheOpenGL {
public:
- explicit ShaderDiskCacheOpenGL(Core::System& system);
+ explicit ShaderDiskCacheOpenGL();
~ShaderDiskCacheOpenGL();
+ /// Binds a title ID for all future operations.
+ void BindTitleID(u64 title_id);
+
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
@@ -157,8 +156,6 @@ private:
return LoadArrayFromPrecompiled(&object, 1);
}
- Core::System& system;
-
// Stores whole precompiled cache which will be read from or saved to the precompiled chache
// file
FileSys::VectorVfsFile precompiled_cache_virtual_file;
@@ -168,8 +165,11 @@ private:
// Stored transferable shaders
std::unordered_set<u64> stored_transferable;
+ /// Title ID to operate on
+ u64 title_id = 0;
+
// The cache has been loaded at boot
- bool is_usable{};
+ bool is_usable = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 9e74eda0d..4bf0d6090 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <string_view>
#include <vector>
#include <glad/glad.h>
#include "common/assert.h"
@@ -11,7 +12,8 @@
namespace OpenGL::GLShader {
namespace {
-const char* GetStageDebugName(GLenum type) {
+
+std::string_view StageDebugName(GLenum type) {
switch (type) {
case GL_VERTEX_SHADER:
return "vertex";
@@ -25,12 +27,17 @@ const char* GetStageDebugName(GLenum type) {
UNIMPLEMENTED();
return "unknown";
}
+
} // Anonymous namespace
-GLuint LoadShader(const char* source, GLenum type) {
- const char* debug_type = GetStageDebugName(type);
+GLuint LoadShader(std::string_view source, GLenum type) {
+ const std::string_view debug_type = StageDebugName(type);
const GLuint shader_id = glCreateShader(type);
- glShaderSource(shader_id, 1, &source, nullptr);
+
+ const GLchar* source_string = source.data();
+ const GLint source_length = static_cast<GLint>(source.size());
+
+ glShaderSource(shader_id, 1, &source_string, &source_length);
LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
glCompileShader(shader_id);
diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h
index 03b7548c2..1b770532e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.h
+++ b/src/video_core/renderer_opengl/gl_shader_util.h
@@ -38,7 +38,7 @@ void LogShaderSource(T... shaders) {
* @param source String of the GLSL shader program
* @param type Type of the shader (GL_VERTEX_SHADER, GL_GEOMETRY_SHADER or GL_FRAGMENT_SHADER)
*/
-GLuint LoadShader(const char* source, GLenum type);
+GLuint LoadShader(std::string_view source, GLenum type);
/**
* Utility function to create and compile an OpenGL GLSL shader program (vertex + fragment shader)
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index d24fad3de..6bcf831f2 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -214,10 +214,8 @@ void SetupDirtyMisc(Tables& tables) {
} // Anonymous namespace
-StateTracker::StateTracker(Core::System& system) : system{system} {}
-
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
+StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags} {
+ auto& dirty = gpu.Maxwell3D().dirty;
auto& tables = dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyColorMasks(tables);
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 0f823288e..9d127548f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -13,8 +13,8 @@
#include "video_core/dirty_flags.h"
#include "video_core/engines/maxwell_3d.h"
-namespace Core {
-class System;
+namespace Tegra {
+class GPU;
}
namespace OpenGL {
@@ -90,9 +90,7 @@ static_assert(Last <= std::numeric_limits<u8>::max());
class StateTracker {
public:
- explicit StateTracker(Core::System& system);
-
- void Initialize();
+ explicit StateTracker(Tegra::GPU& gpu);
void BindIndexBuffer(GLuint new_index_buffer) {
if (index_buffer == new_index_buffer) {
@@ -103,7 +101,6 @@ public:
}
void NotifyScreenDrawVertexArray() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::VertexFormats] = true;
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
@@ -117,98 +114,81 @@ public:
}
void NotifyPolygonModes() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonModes] = true;
flags[OpenGL::Dirty::PolygonModeFront] = true;
flags[OpenGL::Dirty::PolygonModeBack] = true;
}
void NotifyViewport0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Viewports] = true;
flags[OpenGL::Dirty::Viewport0] = true;
}
void NotifyScissor0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::Scissors] = true;
flags[OpenGL::Dirty::Scissor0] = true;
}
void NotifyColorMask0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ColorMasks] = true;
flags[OpenGL::Dirty::ColorMask0] = true;
}
void NotifyBlend0() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::BlendStates] = true;
flags[OpenGL::Dirty::BlendState0] = true;
}
void NotifyFramebuffer() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[VideoCommon::Dirty::RenderTargets] = true;
}
void NotifyFrontFace() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FrontFace] = true;
}
void NotifyCullTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::CullTest] = true;
}
void NotifyDepthMask() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthMask] = true;
}
void NotifyDepthTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::DepthTest] = true;
}
void NotifyStencilTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::StencilTest] = true;
}
void NotifyPolygonOffset() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::PolygonOffset] = true;
}
void NotifyRasterizeEnable() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::RasterizeEnable] = true;
}
void NotifyFramebufferSRGB() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::FramebufferSRGB] = true;
}
void NotifyLogicOp() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::LogicOp] = true;
}
void NotifyClipControl() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::ClipControl] = true;
}
void NotifyAlphaTest() {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
flags[OpenGL::Dirty::AlphaTest] = true;
}
private:
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
GLuint index_buffer = 0;
};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 0a7bc9e2b..a863ef218 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -403,7 +403,7 @@ void CachedSurface::DecorateSurfaceName() {
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName());
}
-void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) {
+void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix) {
LabelGLObject(GL_TEXTURE, main_view.handle, gpu_addr, prefix);
}
@@ -532,10 +532,12 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
return texture_view;
}
-TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
- VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker)
- : TextureCacheBase{system, rasterizer, device.HasASTC()}, state_tracker{state_tracker} {
+TextureCacheOpenGL::TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ StateTracker& state_tracker_)
+ : TextureCacheBase{rasterizer, maxwell3d, gpu_memory, device.HasASTC()}, state_tracker{
+ state_tracker_} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index bfc4ddf5d..7787134fc 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -90,7 +90,7 @@ public:
Tegra::Texture::SwizzleSource z_source,
Tegra::Texture::SwizzleSource w_source);
- void DecorateViewName(GPUVAddr gpu_addr, std::string prefix);
+ void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
void MarkAsModified(u64 tick) {
surface.MarkAsModified(true, tick);
@@ -129,8 +129,10 @@ private:
class TextureCacheOpenGL final : public TextureCacheBase {
public:
- explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const Device& device, StateTracker& state_tracker);
+ explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const Device& device,
+ StateTracker& state_tracker);
~TextureCacheOpenGL();
protected:
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index fe9bd4b5a..a8be2aa37 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -47,6 +47,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_UNSIGNED_INT_2_10_10_10_REV;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedNorm:
@@ -70,6 +72,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
return GL_INT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return GL_INT_2_10_10_10_REV;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::Float:
@@ -84,6 +88,8 @@ inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
case Maxwell::VertexAttribute::Size::Size_32_32_32:
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return GL_FLOAT;
+ default:
+ break;
}
break;
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 14bbc3a1c..2ccca1993 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -21,6 +21,8 @@
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
+#include "video_core/host_shaders/opengl_present_frag.h"
+#include "video_core/host_shaders/opengl_present_vert.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
@@ -30,60 +32,6 @@ namespace OpenGL {
namespace {
-constexpr std::size_t SWAP_CHAIN_SIZE = 3;
-
-struct Frame {
- u32 width{}; /// Width of the frame (to detect resize)
- u32 height{}; /// Height of the frame
- bool color_reloaded{}; /// Texture attachment was recreated (ie: resized)
- OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO
- OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread
- OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
- GLsync render_fence{}; /// Fence created on the render thread
- GLsync present_fence{}; /// Fence created on the presentation thread
- bool is_srgb{}; /// Framebuffer is sRGB or RGB
-};
-
-constexpr char VERTEX_SHADER[] = R"(
-#version 430 core
-
-out gl_PerVertex {
- vec4 gl_Position;
-};
-
-layout (location = 0) in vec2 vert_position;
-layout (location = 1) in vec2 vert_tex_coord;
-layout (location = 0) out vec2 frag_tex_coord;
-
-// This is a truncated 3x3 matrix for 2D transformations:
-// The upper-left 2x2 submatrix performs scaling/rotation/mirroring.
-// The third column performs translation.
-// The third row could be used for projection, which we don't need in 2D. It hence is assumed to
-// implicitly be [0, 0, 1]
-layout (location = 0) uniform mat3x2 modelview_matrix;
-
-void main() {
- // Multiply input position by the rotscale part of the matrix and then manually translate by
- // the last column. This is equivalent to using a full 3x3 matrix and expanding the vector
- // to `vec3(vert_position.xy, 1.0)`
- gl_Position = vec4(mat2(modelview_matrix) * vert_position + modelview_matrix[2], 0.0, 1.0);
- frag_tex_coord = vert_tex_coord;
-}
-)";
-
-constexpr char FRAGMENT_SHADER[] = R"(
-#version 430 core
-
-layout (location = 0) in vec2 frag_tex_coord;
-layout (location = 0) out vec4 color;
-
-layout (binding = 0) uniform sampler2D color_texture;
-
-void main() {
- color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
-}
-)";
-
constexpr GLint PositionLocation = 0;
constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
@@ -96,24 +44,6 @@ struct ScreenRectVertex {
std::array<GLfloat, 2> tex_coord;
};
-/// Returns true if any debug tool is attached
-bool HasDebugTool() {
- const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
- if (nsight) {
- return true;
- }
-
- GLint num_extensions;
- glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
- for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
- const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
- if (!std::strcmp(name, "GL_EXT_debug_tool")) {
- return true;
- }
- }
- return false;
-}
-
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -197,132 +127,15 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
-/**
- * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
- * but also make sure that rendering happens at the pace that the frontend dictates. This is a
- * helper class that the renderer uses to sync frames between the render thread and the presentation
- * thread
- */
-class FrameMailbox {
-public:
- std::mutex swap_chain_lock;
- std::condition_variable present_cv;
- std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
- std::queue<Frame*> free_queue;
- std::deque<Frame*> present_queue;
- Frame* previous_frame{};
-
- FrameMailbox() {
- for (auto& frame : swap_chain) {
- free_queue.push(&frame);
- }
- }
-
- ~FrameMailbox() {
- // lock the mutex and clear out the present and free_queues and notify any people who are
- // blocked to prevent deadlock on shutdown
- std::scoped_lock lock{swap_chain_lock};
- std::queue<Frame*>().swap(free_queue);
- present_queue.clear();
- present_cv.notify_all();
- }
-
- void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
- frame->present.Release();
- frame->present.Create();
- GLint previous_draw_fbo{};
- glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
- glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
- frame->color_reloaded = false;
- }
-
- void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
- // Recreate the color texture attachment
- frame->color.Release();
- frame->color.Create();
- const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
- glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
-
- // Recreate the FBO for the render target
- frame->render.Release();
- frame->render.Create();
- glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
- glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
- frame->color.handle);
- if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
- LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
- }
-
- frame->width = width;
- frame->height = height;
- frame->color_reloaded = true;
- }
-
- Frame* GetRenderFrame() {
- std::unique_lock lock{swap_chain_lock};
-
- // If theres no free frames, we will reuse the oldest render frame
- if (free_queue.empty()) {
- auto frame = present_queue.back();
- present_queue.pop_back();
- return frame;
- }
-
- Frame* frame = free_queue.front();
- free_queue.pop();
- return frame;
- }
-
- void ReleaseRenderFrame(Frame* frame) {
- std::unique_lock lock{swap_chain_lock};
- present_queue.push_front(frame);
- present_cv.notify_one();
- }
-
- Frame* TryGetPresentFrame(int timeout_ms) {
- std::unique_lock lock{swap_chain_lock};
- // wait for new entries in the present_queue
- present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
- [&] { return !present_queue.empty(); });
- if (present_queue.empty()) {
- // timed out waiting for a frame to draw so return the previous frame
- return previous_frame;
- }
-
- // free the previous frame and add it back to the free queue
- if (previous_frame) {
- free_queue.push(previous_frame);
- }
-
- // the newest entries are pushed to the front of the queue
- Frame* frame = present_queue.front();
- present_queue.pop_front();
- // remove all old entries from the present queue and move them back to the free_queue
- for (auto f : present_queue) {
- free_queue.push(f);
- }
- present_queue.clear();
- previous_frame = frame;
- return frame;
- }
-};
-
-RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
- Core::Frontend::GraphicsContext& context)
- : RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
- program_manager{device}, has_debug_tool{HasDebugTool()} {}
+RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window_,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context)
+ : RendererBase{emu_window_, std::move(context)}, telemetry_session{telemetry_session_},
+ emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, program_manager{device} {}
RendererOpenGL::~RendererOpenGL() = default;
-MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
-MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
-
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (!framebuffer) {
return;
@@ -331,79 +144,34 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
PrepareRendertarget(framebuffer);
RenderScreenshot();
- Frame* frame;
- {
- MICROPROFILE_SCOPE(OpenGL_WaitPresent);
-
- frame = frame_mailbox->GetRenderFrame();
-
- // Clean up sync objects before drawing
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+ DrawScreen(emu_window.GetFramebufferLayout());
- // INTEL driver workaround. We can't delete the previous render sync object until we are
- // sure that the presentation is done
- if (frame->present_fence) {
- glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
- }
-
- // delete the draw fence if the frame wasn't presented
- if (frame->render_fence) {
- glDeleteSync(frame->render_fence);
- frame->render_fence = 0;
- }
-
- // wait for the presentation to be done
- if (frame->present_fence) {
- glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
- glDeleteSync(frame->present_fence);
- frame->present_fence = 0;
- }
- }
+ ++m_current_frame;
- {
- MICROPROFILE_SCOPE(OpenGL_RenderFrame);
- const auto& layout = render_window.GetFramebufferLayout();
-
- // Recreate the frame if the size of the window has changed
- if (layout.width != frame->width || layout.height != frame->height ||
- screen_info.display_srgb != frame->is_srgb) {
- LOG_DEBUG(Render_OpenGL, "Reloading render frame");
- frame->is_srgb = screen_info.display_srgb;
- frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
- }
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
- DrawScreen(layout);
- // Create a fence for the frontend to wait on and swap this frame to OffTex
- frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- glFlush();
- frame_mailbox->ReleaseRenderFrame(frame);
- m_current_frame++;
- rasterizer->TickFrame();
- }
+ rasterizer->TickFrame();
render_window.PollEvents();
- if (has_debug_tool) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
- Present(0);
- context.SwapBuffers();
- }
+ context->SwapBuffers();
}
void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
- if (framebuffer) {
- // If framebuffer is provided, reload it from memory to a texture
- if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
- screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
- screen_info.texture.pixel_format != framebuffer->pixel_format ||
- gl_framebuffer_data.empty()) {
- // Reallocate texture if the framebuffer size has changed.
- // This is expected to not happen very often and hence should not be a
- // performance problem.
- ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
- }
-
- // Load the framebuffer from memory, draw it to the screen, and swap buffers
- LoadFBToScreenInfo(*framebuffer);
+ if (!framebuffer) {
+ return;
+ }
+ // If framebuffer is provided, reload it from memory to a texture
+ if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
+ screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format ||
+ gl_framebuffer_data.empty()) {
+ // Reallocate texture if the framebuffer size has changed.
+ // This is expected to not happen very often and hence should not be a
+ // performance problem.
+ ConfigureFramebufferTexture(screen_info.texture, *framebuffer);
}
+
+ // Load the framebuffer from memory, draw it to the screen, and swap buffers
+ LoadFBToScreenInfo(*framebuffer);
}
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
@@ -423,7 +191,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- u8* const host_ptr{system.Memory().GetPointer(framebuffer_addr)};
+ u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
// TODO(Rodrigo): Read this from HLE
@@ -453,17 +221,15 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
}
void RendererOpenGL::InitOpenGLObjects() {
- frame_mailbox = std::make_unique<FrameMailbox>();
-
glClearColor(Settings::values.bg_red.GetValue(), Settings::values.bg_green.GetValue(),
Settings::values.bg_blue.GetValue(), 0.0f);
// Create shader programs
OGLShader vertex_shader;
- vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
+ vertex_shader.Create(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
OGLShader fragment_shader;
- fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
+ fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
@@ -508,7 +274,6 @@ void RendererOpenGL::AddTelemetryFields() {
LOG_INFO(Render_OpenGL, "GL_VENDOR: {}", gpu_vendor);
LOG_INFO(Render_OpenGL, "GL_RENDERER: {}", gpu_model);
- auto& telemetry_session = system.TelemetrySession();
constexpr auto user_system = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(user_system, "GPU_Vendor", gpu_vendor);
telemetry_session.AddField(user_system, "GPU_Model", gpu_model);
@@ -519,8 +284,8 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
- program_manager, state_tracker);
+ rasterizer = std::make_unique<RasterizerOpenGL>(emu_window, gpu, cpu_memory, device,
+ screen_info, program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -683,51 +448,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
program_manager.RestoreGuestPipeline();
}
-bool RendererOpenGL::TryPresent(int timeout_ms) {
- if (has_debug_tool) {
- LOG_DEBUG(Render_OpenGL,
- "Skipping presentation because we are presenting on the main context");
- return false;
- }
- return Present(timeout_ms);
-}
-
-bool RendererOpenGL::Present(int timeout_ms) {
- const auto& layout = render_window.GetFramebufferLayout();
- auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
- if (!frame) {
- LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
- return false;
- }
-
- // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
- // readback since we won't be doing any blending
- glClear(GL_COLOR_BUFFER_BIT);
-
- // Recreate the presentation FBO if the color attachment was changed
- if (frame->color_reloaded) {
- LOG_DEBUG(Render_OpenGL, "Reloading present frame");
- frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
- }
- glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED);
- // INTEL workaround.
- // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
- // it on the emulation thread without too much penalty
- // glDeleteSync(frame.render_sync);
- // frame.render_sync = 0;
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
- glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height,
- GL_COLOR_BUFFER_BIT, GL_LINEAR);
-
- // Insert fence for the main thread to block on
- frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
- glFlush();
-
- glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
- return true;
-}
-
void RendererOpenGL::RenderScreenshot() {
if (!renderer_settings.screenshot_requested) {
return;
@@ -742,7 +462,7 @@ void RendererOpenGL::RenderScreenshot() {
screenshot_framebuffer.Create();
glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
- Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
+ const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
GLuint renderbuffer;
glGenRenderbuffers(1, &renderbuffer);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 8b18d32e6..9ef181f95 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -16,16 +16,25 @@
namespace Core {
class System;
-}
+class TelemetrySession;
+} // namespace Core
namespace Core::Frontend {
class EmuWindow;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Layout {
struct FramebufferLayout;
}
+namespace Tegra {
+class GPU;
+}
+
namespace OpenGL {
/// Structure used for storing information about the textures for the Switch screen
@@ -46,24 +55,17 @@ struct ScreenInfo {
TextureInfo texture;
};
-struct PresentationTexture {
- u32 width = 0;
- u32 height = 0;
- OGLTexture texture;
-};
-
-class FrameMailbox;
-
class RendererOpenGL final : public VideoCore::RendererBase {
public:
- explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
- Core::Frontend::GraphicsContext& context);
+ explicit RendererOpenGL(Core::TelemetrySession& telemetry_session,
+ Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererOpenGL() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- bool TryPresent(int timeout_ms) override;
private:
/// Initializes the OpenGL state and creates persistent objects.
@@ -91,14 +93,13 @@ private:
void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
- bool Present(int timeout_ms);
-
+ Core::TelemetrySession& telemetry_session;
Core::Frontend::EmuWindow& emu_window;
- Core::System& system;
- Core::Frontend::GraphicsContext& context;
- const Device device;
+ Core::Memory::Memory& cpu_memory;
+ Tegra::GPU& gpu;
- StateTracker state_tracker{system};
+ const Device device;
+ StateTracker state_tracker{gpu};
// OpenGL object IDs
OGLBuffer vertex_buffer;
@@ -120,13 +121,8 @@ private:
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
- Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
+ Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags{};
Common::Rectangle<int> framebuffer_crop_rect;
-
- /// Frame presentation mailbox
- std::unique_ptr<FrameMailbox> frame_mailbox;
-
- bool has_debug_tool = false;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 81a39a3b8..da5c550ea 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -58,6 +58,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0);
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
+ topology.Assign(regs.draw.topology);
std::memcpy(&point_size, &regs.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast
@@ -131,7 +132,6 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
}
void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
- const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
u32 packed_front_face = PackFrontFace(regs.front_face);
if (regs.screen_y_control.triangle_rast_flip != 0) {
// Flip front face
@@ -161,7 +161,6 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
depth_test_enable.Assign(regs.depth_test_enable);
front_face.Assign(packed_front_face);
depth_test_func.Assign(PackComparisonOp(regs.depth_test_func));
- topology.Assign(topology_index);
cull_face.Assign(PackCullFace(regs.cull_face));
cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0);
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index cdcbb65f5..2c18eeaae 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -150,9 +150,8 @@ struct FixedPipelineState {
};
union {
u32 raw2;
- BitField<0, 4, u32> topology;
- BitField<4, 2, u32> cull_face;
- BitField<6, 1, u32> cull_enable;
+ BitField<0, 2, u32> cull_face;
+ BitField<2, 1, u32> cull_enable;
};
std::array<VertexBinding, Maxwell::NumVertexArrays> vertex_bindings;
@@ -169,10 +168,6 @@ struct FixedPipelineState {
Maxwell::FrontFace FrontFace() const noexcept {
return UnpackFrontFace(front_face.Value());
}
-
- constexpr Maxwell::PrimitiveTopology Topology() const noexcept {
- return static_cast<Maxwell::PrimitiveTopology>(topology.Value());
- }
};
union {
@@ -190,6 +185,7 @@ struct FixedPipelineState {
BitField<18, 1, u32> logic_op_enable;
BitField<19, 4, u32> logic_op;
BitField<23, 1, u32> rasterize_enable;
+ BitField<24, 4, Maxwell::PrimitiveTopology> topology;
};
u32 point_size;
std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index f8c77f4fa..d22de1d81 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -78,9 +78,10 @@ VkSamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode w
case Tegra::Texture::WrapMode::MirrorOnceBorder:
UNIMPLEMENTED();
return VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
+ return {};
}
- UNIMPLEMENTED_MSG("Unimplemented wrap mode={}", static_cast<u32>(wrap_mode));
- return {};
}
VkCompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func) {
@@ -298,9 +299,10 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
case Maxwell::PrimitiveTopology::Patches:
return VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
+ default:
+ UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
+ return {};
}
- UNIMPLEMENTED_MSG("Unimplemented topology={}", static_cast<u32>(topology));
- return {};
}
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
@@ -325,6 +327,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_UNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedNorm:
@@ -347,6 +351,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_SNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedScaled:
@@ -369,6 +375,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_USCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedScaled:
@@ -391,6 +399,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -421,6 +431,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32A32_UINT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_UINT_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::SignedInt:
@@ -451,6 +463,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32A32_SINT;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SINT_PACK32;
+ default:
+ break;
}
break;
case Maxwell::VertexAttribute::Type::Float:
@@ -471,6 +485,8 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
+ default:
+ break;
}
break;
}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index 7ffc90cd0..f2610868e 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -25,9 +25,9 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -56,7 +56,7 @@ VkBool32 DebugCallback(VkDebugUtilsMessageSeverityFlagBitsEXT severity,
VkDebugUtilsMessageTypeFlagsEXT type,
const VkDebugUtilsMessengerCallbackDataEXT* data,
[[maybe_unused]] void* user_data) {
- const char* message{data->pMessage};
+ const char* const message{data->pMessage};
if (severity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
LOG_CRITICAL(Render_Vulkan, "{}", message);
@@ -86,7 +86,7 @@ Common::DynamicLibrary OpenVulkanLibrary() {
if (!library.Open(filename.c_str())) {
// Android devices may not have libvulkan.so.1, only libvulkan.so.
filename = Common::DynamicLibrary::GetVersionedFilename("vulkan");
- library.Open(filename.c_str());
+ (void)library.Open(filename.c_str());
}
#endif
return library;
@@ -240,8 +240,12 @@ std::string BuildCommaSeparatedExtensions(std::vector<std::string> available_ext
} // Anonymous namespace
-RendererVulkan::RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system)
- : RendererBase(window), system{system} {}
+RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
+ Core::Frontend::EmuWindow& emu_window,
+ Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context)
+ : RendererBase{emu_window, std::move(context)}, telemetry_session{telemetry_session_},
+ cpu_memory{cpu_memory_}, gpu{gpu_} {}
RendererVulkan::~RendererVulkan() {
ShutDown();
@@ -268,11 +272,11 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
scheduler->WaitWorker();
swapchain->AcquireNextImage();
- const auto [fence, render_semaphore] = blit_screen->Draw(*framebuffer, use_accelerated);
+ const VkSemaphore render_semaphore = blit_screen->Draw(*framebuffer, use_accelerated);
- scheduler->Flush(false, render_semaphore);
+ scheduler->Flush(render_semaphore);
- if (swapchain->Present(render_semaphore, fence)) {
+ if (swapchain->Present(render_semaphore)) {
blit_screen->Recreate();
}
@@ -282,11 +286,6 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
render_window.PollEvents();
}
-bool RendererVulkan::TryPresent(int /*timeout_ms*/) {
- // TODO (bunnei): ImplementMe
- return true;
-}
-
bool RendererVulkan::Init() {
library = OpenVulkanLibrary();
std::tie(instance, instance_version) = CreateInstance(
@@ -299,23 +298,21 @@ bool RendererVulkan::Init() {
memory_manager = std::make_unique<VKMemoryManager>(*device);
- resource_manager = std::make_unique<VKResourceManager>(*device);
+ state_tracker = std::make_unique<StateTracker>(gpu);
+
+ scheduler = std::make_unique<VKScheduler>(*device, *state_tracker);
const auto& framebuffer = render_window.GetFramebufferLayout();
- swapchain = std::make_unique<VKSwapchain>(*surface, *device);
+ swapchain = std::make_unique<VKSwapchain>(*surface, *device, *scheduler);
swapchain->Create(framebuffer.width, framebuffer.height, false);
- state_tracker = std::make_unique<StateTracker>(system);
-
- scheduler = std::make_unique<VKScheduler>(*device, *resource_manager, *state_tracker);
-
- rasterizer = std::make_unique<RasterizerVulkan>(system, render_window, screen_info, *device,
- *resource_manager, *memory_manager,
- *state_tracker, *scheduler);
+ rasterizer = std::make_unique<RasterizerVulkan>(render_window, gpu, gpu.MemoryManager(),
+ cpu_memory, screen_info, *device,
+ *memory_manager, *state_tracker, *scheduler);
- blit_screen = std::make_unique<VKBlitScreen>(system, render_window, *rasterizer, *device,
- *resource_manager, *memory_manager, *swapchain,
- *scheduler, screen_info);
+ blit_screen =
+ std::make_unique<VKBlitScreen>(cpu_memory, render_window, *rasterizer, *device,
+ *memory_manager, *swapchain, *scheduler, screen_info);
return true;
}
@@ -333,7 +330,6 @@ void RendererVulkan::ShutDown() {
scheduler.reset();
swapchain.reset();
memory_manager.reset();
- resource_manager.reset();
device.reset();
}
@@ -442,8 +438,7 @@ void RendererVulkan::Report() const {
LOG_INFO(Render_Vulkan, "Device: {}", model_name);
LOG_INFO(Render_Vulkan, "Vulkan: {}", api_version);
- auto& telemetry_session = system.TelemetrySession();
- constexpr auto field = Common::Telemetry::FieldType::UserSystem;
+ static constexpr auto field = Common::Telemetry::FieldType::UserSystem;
telemetry_session.AddField(field, "GPU_Vendor", vendor_name);
telemetry_session.AddField(field, "GPU_Model", model_name);
telemetry_session.AddField(field, "GPU_Vulkan_Driver", driver_name);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
index 9617a93e9..1044ca124 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.h
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -14,7 +14,15 @@
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
-class System;
+class TelemetrySession;
+}
+
+namespace Core::Memory {
+class Memory;
+}
+
+namespace Tegra {
+class GPU;
}
namespace Vulkan {
@@ -22,9 +30,7 @@ namespace Vulkan {
class StateTracker;
class VKBlitScreen;
class VKDevice;
-class VKFence;
class VKMemoryManager;
-class VKResourceManager;
class VKSwapchain;
class VKScheduler;
class VKImage;
@@ -38,13 +44,15 @@ struct VKScreenInfo {
class RendererVulkan final : public VideoCore::RendererBase {
public:
- explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+ explicit RendererVulkan(Core::TelemetrySession& telemtry_session,
+ Core::Frontend::EmuWindow& emu_window, Core::Memory::Memory& cpu_memory,
+ Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context);
~RendererVulkan() override;
bool Init() override;
void ShutDown() override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
- bool TryPresent(int timeout_ms) override;
static std::vector<std::string> EnumerateDevices();
@@ -57,7 +65,9 @@ private:
void Report() const;
- Core::System& system;
+ Core::TelemetrySession& telemetry_session;
+ Core::Memory::Memory& cpu_memory;
+ Tegra::GPU& gpu;
Common::DynamicLibrary library;
vk::InstanceDispatch dld;
@@ -71,11 +81,10 @@ private:
vk::DebugCallback debug_callback;
std::unique_ptr<VKDevice> device;
- std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKMemoryManager> memory_manager;
- std::unique_ptr<VKResourceManager> resource_manager;
std::unique_ptr<StateTracker> state_tracker;
std::unique_ptr<VKScheduler> scheduler;
+ std::unique_ptr<VKSwapchain> swapchain;
std::unique_ptr<VKBlitScreen> blit_screen;
};
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
index a551e3de8..b5b60309e 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -12,11 +12,9 @@
#include "common/assert.h"
#include "common/common_types.h"
#include "common/math_util.h"
-
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/memory.h"
-
#include "video_core/gpu.h"
#include "video_core/morton.h"
#include "video_core/rasterizer_interface.h"
@@ -24,8 +22,8 @@
#include "video_core/renderer_vulkan/vk_blit_screen.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
@@ -210,17 +208,15 @@ struct VKBlitScreen::BufferData {
// Unaligned image data goes here
};
-VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
- VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKSwapchain& swapchain, VKScheduler& scheduler,
- const VKScreenInfo& screen_info)
- : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
- scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
- watches.resize(image_count);
- std::generate(watches.begin(), watches.end(),
- []() { return std::make_unique<VKFenceWatch>(); });
+VKBlitScreen::VKBlitScreen(Core::Memory::Memory& cpu_memory_,
+ Core::Frontend::EmuWindow& render_window_,
+ VideoCore::RasterizerInterface& rasterizer_, const VKDevice& device_,
+ VKMemoryManager& memory_manager_, VKSwapchain& swapchain_,
+ VKScheduler& scheduler_, const VKScreenInfo& screen_info_)
+ : cpu_memory{cpu_memory_}, render_window{render_window_}, rasterizer{rasterizer_},
+ device{device_}, memory_manager{memory_manager_}, swapchain{swapchain_},
+ scheduler{scheduler_}, image_count{swapchain.GetImageCount()}, screen_info{screen_info_} {
+ resource_ticks.resize(image_count);
CreateStaticResources();
CreateDynamicResources();
@@ -232,15 +228,16 @@ void VKBlitScreen::Recreate() {
CreateDynamicResources();
}
-std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated) {
+VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
RefreshResources(framebuffer);
// Finish any pending renderpass
scheduler.RequestOutsideRenderPassOperationContext();
const std::size_t image_index = swapchain.GetImageIndex();
- watches[image_index]->Watch(scheduler.GetFence());
+
+ scheduler.Wait(resource_ticks[image_index]);
+ resource_ticks[image_index] = scheduler.CurrentTick();
VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
@@ -259,7 +256,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
const auto pixel_format =
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
- const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
+ const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr);
rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
// TODO(Rodrigo): Read this from HLE
@@ -343,7 +340,7 @@ std::tuple<VKFence&, VkSemaphore> VKBlitScreen::Draw(const Tegra::FramebufferCon
cmdbuf.EndRenderPass();
});
- return {scheduler.GetFence(), *semaphores[image_index]};
+ return *semaphores[image_index];
}
void VKBlitScreen::CreateStaticResources() {
@@ -711,7 +708,7 @@ void VKBlitScreen::CreateFramebuffers() {
void VKBlitScreen::ReleaseRawImages() {
for (std::size_t i = 0; i < raw_images.size(); ++i) {
- watches[i]->Wait();
+ scheduler.Wait(resource_ticks.at(i));
}
raw_images.clear();
raw_buffer_commits.clear();
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
index 243640fab..8f2839214 100644
--- a/src/video_core/renderer_vulkan/vk_blit_screen.h
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -5,16 +5,18 @@
#pragma once
#include <memory>
-#include <tuple>
#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Core {
class System;
}
+namespace Core::Memory {
+class Memory;
+}
+
namespace Core::Frontend {
class EmuWindow;
}
@@ -30,26 +32,26 @@ class RasterizerInterface;
namespace Vulkan {
struct ScreenInfo;
+
class RasterizerVulkan;
class VKDevice;
-class VKFence;
class VKImage;
class VKScheduler;
class VKSwapchain;
class VKBlitScreen final {
public:
- explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+ explicit VKBlitScreen(Core::Memory::Memory& cpu_memory,
+ Core::Frontend::EmuWindow& render_window,
VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- VKSwapchain& swapchain, VKScheduler& scheduler,
- const VKScreenInfo& screen_info);
+ VKMemoryManager& memory_manager, VKSwapchain& swapchain,
+ VKScheduler& scheduler, const VKScreenInfo& screen_info);
~VKBlitScreen();
void Recreate();
- std::tuple<VKFence&, VkSemaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
- bool use_accelerated);
+ [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
+ bool use_accelerated);
private:
struct BufferData;
@@ -81,11 +83,10 @@ private:
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
std::size_t image_index) const;
- Core::System& system;
+ Core::Memory::Memory& cpu_memory;
Core::Frontend::EmuWindow& render_window;
VideoCore::RasterizerInterface& rasterizer;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKSwapchain& swapchain;
VKScheduler& scheduler;
@@ -106,7 +107,7 @@ private:
vk::Buffer buffer;
VKMemoryCommit buffer_commit;
- std::vector<std::unique_ptr<VKFenceWatch>> watches;
+ std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> semaphores;
std::vector<std::unique_ptr<VKImage>> raw_images;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 1d2f8b557..d9d3da9ea 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -145,14 +145,15 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
});
}
-VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler, VKStagingBufferPool& staging_pool)
- : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, system,
- CreateStreamBuffer(device,
- scheduler)},
- device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{
- staging_pool} {}
+VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
+ const VKDevice& device_, VKMemoryManager& memory_manager_,
+ VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_)
+ : VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer, gpu_memory, cpu_memory,
+ CreateStreamBuffer(device_,
+ scheduler_)},
+ device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
+ staging_pool_} {}
VKBufferCache::~VKBufferCache() = default;
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index 991ee451c..7fb5ceedf 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -13,10 +13,6 @@
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
-namespace Core {
-class System;
-}
-
namespace Vulkan {
class VKDevice;
@@ -53,7 +49,8 @@ private:
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
public:
- explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache();
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.cpp b/src/video_core/renderer_vulkan/vk_command_pool.cpp
new file mode 100644
index 000000000..6339f4fe0
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.cpp
@@ -0,0 +1,46 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstddef>
+
+#include "video_core/renderer_vulkan/vk_command_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+constexpr size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
+
+struct CommandPool::Pool {
+ vk::CommandPool handle;
+ vk::CommandBuffers cmdbufs;
+};
+
+CommandPool::CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device)
+ : ResourcePool(master_semaphore, COMMAND_BUFFER_POOL_SIZE), device{device} {}
+
+CommandPool::~CommandPool() = default;
+
+void CommandPool::Allocate(size_t begin, size_t end) {
+ // Command buffers are going to be commited, recorded, executed every single usage cycle.
+ // They are also going to be reseted when commited.
+ Pool& pool = pools.emplace_back();
+ pool.handle = device.GetLogical().CreateCommandPool({
+ .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+ .pNext = nullptr,
+ .flags =
+ VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+ .queueFamilyIndex = device.GetGraphicsFamily(),
+ });
+ pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
+}
+
+VkCommandBuffer CommandPool::Commit() {
+ const size_t index = CommitResource();
+ const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
+ const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
+ return pools[pool_index].cmdbufs[sub_index];
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_command_pool.h b/src/video_core/renderer_vulkan/vk_command_pool.h
new file mode 100644
index 000000000..b9cb3fb5d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_command_pool.h
@@ -0,0 +1,34 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+class MasterSemaphore;
+class VKDevice;
+
+class CommandPool final : public ResourcePool {
+public:
+ explicit CommandPool(MasterSemaphore& master_semaphore, const VKDevice& device);
+ ~CommandPool() override;
+
+ void Allocate(size_t begin, size_t end) override;
+
+ VkCommandBuffer Commit();
+
+private:
+ struct Pool;
+
+ const VKDevice& device;
+ std::vector<Pool> pools;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
index 182461ed9..9637c6059 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp
@@ -112,7 +112,8 @@ constexpr u8 quad_array[] = {
0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+ 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
return {
@@ -218,7 +219,8 @@ constexpr u8 uint8_pass[] = {
0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+ 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
constexpr u8 QUAD_INDEXED_SPV[] = {
@@ -341,7 +343,8 @@ constexpr u8 QUAD_INDEXED_SPV[] = {
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
- 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+ 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
+};
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
return {{
@@ -448,12 +451,12 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
VKComputePass::~VKComputePass() = default;
-VkDescriptorSet VKComputePass::CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
- VKFence& fence) {
+VkDescriptorSet VKComputePass::CommitDescriptorSet(
+ VKUpdateDescriptorQueue& update_descriptor_queue) {
if (!descriptor_template) {
return nullptr;
}
- const auto set = descriptor_allocator->Commit(fence);
+ const VkDescriptorSet set = descriptor_allocator->Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
@@ -477,7 +480,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
@@ -520,13 +523,13 @@ Uint8Pass::~Uint8Pass() = default;
std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) {
- const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
+ const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
@@ -589,7 +592,7 @@ std::pair<VkBuffer, u64> QuadIndexedPass::Assemble(
update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
- const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
+ const VkDescriptorSet set = CommitDescriptorSet(update_descriptor_queue);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set,
diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h
index 230b526bc..acc94f27e 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pass.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pass.h
@@ -15,7 +15,6 @@
namespace Vulkan {
class VKDevice;
-class VKFence;
class VKScheduler;
class VKStagingBufferPool;
class VKUpdateDescriptorQueue;
@@ -30,8 +29,7 @@ public:
~VKComputePass();
protected:
- VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue,
- VKFence& fence);
+ VkDescriptorSet CommitDescriptorSet(VKUpdateDescriptorQueue& update_descriptor_queue);
vk::DescriptorUpdateTemplateKHR descriptor_template;
vk::PipelineLayout layout;
diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
index ed9d2991c..9be72dc9b 100644
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@@ -32,7 +32,7 @@ VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
- const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+ const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
index ac4a0884e..f38e089d5 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp
@@ -7,7 +7,8 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
@@ -15,14 +16,15 @@ namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines.
constexpr std::size_t SETS_GROW_RATE = 0x20;
-DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool,
- VkDescriptorSetLayout layout)
- : VKFencedPool{SETS_GROW_RATE}, descriptor_pool{descriptor_pool}, layout{layout} {}
+DescriptorAllocator::DescriptorAllocator(VKDescriptorPool& descriptor_pool_,
+ VkDescriptorSetLayout layout_)
+ : ResourcePool(descriptor_pool_.master_semaphore, SETS_GROW_RATE),
+ descriptor_pool{descriptor_pool_}, layout{layout_} {}
DescriptorAllocator::~DescriptorAllocator() = default;
-VkDescriptorSet DescriptorAllocator::Commit(VKFence& fence) {
- const std::size_t index = CommitResource(fence);
+VkDescriptorSet DescriptorAllocator::Commit() {
+ const std::size_t index = CommitResource();
return descriptors_allocations[index / SETS_GROW_RATE][index % SETS_GROW_RATE];
}
@@ -30,8 +32,9 @@ void DescriptorAllocator::Allocate(std::size_t begin, std::size_t end) {
descriptors_allocations.push_back(descriptor_pool.AllocateDescriptors(layout, end - begin));
}
-VKDescriptorPool::VKDescriptorPool(const VKDevice& device)
- : device{device}, active_pool{AllocateNewPool()} {}
+VKDescriptorPool::VKDescriptorPool(const VKDevice& device_, VKScheduler& scheduler)
+ : device{device_}, master_semaphore{scheduler.GetMasterSemaphore()}, active_pool{
+ AllocateNewPool()} {}
VKDescriptorPool::~VKDescriptorPool() = default;
diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.h b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
index 9efa66bef..544f32a20 100644
--- a/src/video_core/renderer_vulkan/vk_descriptor_pool.h
+++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.h
@@ -6,21 +6,24 @@
#include <vector>
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
+class VKDevice;
class VKDescriptorPool;
+class VKScheduler;
-class DescriptorAllocator final : public VKFencedPool {
+class DescriptorAllocator final : public ResourcePool {
public:
explicit DescriptorAllocator(VKDescriptorPool& descriptor_pool, VkDescriptorSetLayout layout);
~DescriptorAllocator() override;
+ DescriptorAllocator& operator=(const DescriptorAllocator&) = delete;
DescriptorAllocator(const DescriptorAllocator&) = delete;
- VkDescriptorSet Commit(VKFence& fence);
+ VkDescriptorSet Commit();
protected:
void Allocate(std::size_t begin, std::size_t end) override;
@@ -36,15 +39,19 @@ class VKDescriptorPool final {
friend DescriptorAllocator;
public:
- explicit VKDescriptorPool(const VKDevice& device);
+ explicit VKDescriptorPool(const VKDevice& device, VKScheduler& scheduler);
~VKDescriptorPool();
+ VKDescriptorPool(const VKDescriptorPool&) = delete;
+ VKDescriptorPool& operator=(const VKDescriptorPool&) = delete;
+
private:
vk::DescriptorPool* AllocateNewPool();
vk::DescriptorSets AllocateDescriptors(VkDescriptorSetLayout layout, std::size_t count);
const VKDevice& device;
+ MasterSemaphore& master_semaphore;
std::vector<vk::DescriptorPool> pools;
vk::DescriptorPool* active_pool;
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 90916ee0e..e1217ca83 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -45,6 +45,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
VK_KHR_8BIT_STORAGE_EXTENSION_NAME,
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
+ VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
@@ -81,6 +82,21 @@ VkFormatFeatureFlags GetFormatFeatures(VkFormatProperties properties, FormatType
}
}
+[[nodiscard]] bool IsRDNA(std::string_view device_name, VkDriverIdKHR driver_id) {
+ static constexpr std::array RDNA_DEVICES{
+ "5700",
+ "5600",
+ "5500",
+ "5300",
+ };
+ if (driver_id != VK_DRIVER_ID_AMD_PROPRIETARY_KHR) {
+ return false;
+ }
+ return std::any_of(RDNA_DEVICES.begin(), RDNA_DEVICES.end(), [device_name](const char* name) {
+ return device_name.find(name) != std::string_view::npos;
+ });
+}
+
std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
vk::PhysicalDevice physical, const vk::InstanceDispatch& dld) {
static constexpr std::array formats{
@@ -253,6 +269,13 @@ bool VKDevice::Create() {
.inheritedQueries = false,
};
+ VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
+ .pNext = nullptr,
+ .timelineSemaphore = true,
+ };
+ SetNext(next, timeline_semaphore);
+
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
.pNext = nullptr,
@@ -383,6 +406,15 @@ bool VKDevice::Create() {
CollectTelemetryParameters();
+ if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
+ // AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
+ // seems to cause stability issues
+ LOG_WARNING(
+ Render_Vulkan,
+ "Blacklisting AMD proprietary on RDNA devices from VK_EXT_extended_dynamic_state");
+ ext_extended_dynamic_state = false;
+ }
+
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.cpp b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
index d7f65d435..5babbdd0b 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.cpp
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.cpp
@@ -29,8 +29,8 @@ void InnerFence::Queue() {
}
ASSERT(!event);
- event = device.GetLogical().CreateNewEvent();
- ticks = scheduler.Ticks();
+ event = device.GetLogical().CreateEvent();
+ ticks = scheduler.CurrentTick();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
@@ -52,7 +52,7 @@ void InnerFence::Wait() {
}
ASSERT(event);
- if (ticks >= scheduler.Ticks()) {
+ if (ticks >= scheduler.CurrentTick()) {
scheduler.Flush();
}
while (!IsEventSignalled()) {
@@ -71,12 +71,12 @@ bool InnerFence::IsEventSignalled() const {
}
}
-VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache)
- : GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
- device{device}, scheduler{scheduler} {}
+VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+ VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+ const VKDevice& device_, VKScheduler& scheduler_)
+ : GenericFenceManager(rasterizer, gpu, texture_cache, buffer_cache, query_cache),
+ device{device_}, scheduler{scheduler_} {}
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h
index 043fe7947..1547d6d30 100644
--- a/src/video_core/renderer_vulkan/vk_fence_manager.h
+++ b/src/video_core/renderer_vulkan/vk_fence_manager.h
@@ -55,10 +55,10 @@ using GenericFenceManager =
class VKFenceManager final : public GenericFenceManager {
public:
- explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
- VKQueryCache& query_cache);
+ explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer, Tegra::GPU& gpu,
+ Tegra::MemoryManager& memory_manager, VKTextureCache& texture_cache,
+ VKBufferCache& buffer_cache, VKQueryCache& query_cache,
+ const VKDevice& device, VKScheduler& scheduler);
protected:
Fence CreateFence(u32 value, bool is_stubbed) override;
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 2e46c6278..696eaeb5f 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -93,7 +93,7 @@ VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
if (!descriptor_template) {
return {};
}
- const auto set = descriptor_allocator.Commit(scheduler.GetFence());
+ const VkDescriptorSet set = descriptor_allocator.Commit();
update_descriptor_queue.Send(*descriptor_template, set);
return set;
}
@@ -261,12 +261,12 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
vertex_input_ci.pNext = &input_divisor_ci;
}
- const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology());
+ const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
- .topology = MaxwellToVK::PrimitiveTopology(device, dynamic.Topology()),
+ .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
.primitiveRestartEnable = state.primitive_restart_enable != 0 &&
SupportsPrimitiveRestart(input_assembly_topology),
};
@@ -400,7 +400,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
static constexpr std::array extended{
VK_DYNAMIC_STATE_CULL_MODE_EXT,
VK_DYNAMIC_STATE_FRONT_FACE_EXT,
- VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT,
VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.cpp b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
new file mode 100644
index 000000000..ae26e558d
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.cpp
@@ -0,0 +1,56 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <atomic>
+#include <chrono>
+
+#include "core/settings.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+using namespace std::chrono_literals;
+
+MasterSemaphore::MasterSemaphore(const VKDevice& device) {
+ static constexpr VkSemaphoreTypeCreateInfoKHR semaphore_type_ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR,
+ .pNext = nullptr,
+ .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR,
+ .initialValue = 0,
+ };
+ static constexpr VkSemaphoreCreateInfo semaphore_ci{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+ .pNext = &semaphore_type_ci,
+ .flags = 0,
+ };
+ semaphore = device.GetLogical().CreateSemaphore(semaphore_ci);
+
+ if (!Settings::values.renderer_debug) {
+ return;
+ }
+ // Validation layers have a bug where they fail to track resource usage when using timeline
+ // semaphores and synchronizing with GetSemaphoreCounterValueKHR. To workaround this issue, have
+ // a separate thread waiting for each timeline semaphore value.
+ debug_thread = std::thread([this] {
+ u64 counter = 0;
+ while (!shutdown) {
+ if (semaphore.Wait(counter, 10'000'000)) {
+ ++counter;
+ }
+ }
+ });
+}
+
+MasterSemaphore::~MasterSemaphore() {
+ shutdown = true;
+
+ // This thread might not be started
+ if (debug_thread.joinable()) {
+ debug_thread.join();
+ }
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_master_semaphore.h b/src/video_core/renderer_vulkan/vk_master_semaphore.h
new file mode 100644
index 000000000..0e93706d7
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_master_semaphore.h
@@ -0,0 +1,70 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <thread>
+
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/wrapper.h"
+
+namespace Vulkan {
+
+class VKDevice;
+
+class MasterSemaphore {
+public:
+ explicit MasterSemaphore(const VKDevice& device);
+ ~MasterSemaphore();
+
+ /// Returns the current logical tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept {
+ return current_tick;
+ }
+
+ /// Returns the timeline semaphore handle.
+ [[nodiscard]] VkSemaphore Handle() const noexcept {
+ return *semaphore;
+ }
+
+ /// Returns true when a tick has been hit by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) {
+ return gpu_tick >= tick;
+ }
+
+ /// Advance to the logical tick.
+ void NextTick() noexcept {
+ ++current_tick;
+ }
+
+ /// Refresh the known GPU tick
+ void Refresh() {
+ gpu_tick = semaphore.GetCounter();
+ }
+
+ /// Waits for a tick to be hit on the GPU
+ void Wait(u64 tick) {
+ // No need to wait if the GPU is ahead of the tick
+ if (IsFree(tick)) {
+ return;
+ }
+ // Update the GPU tick and try again
+ Refresh();
+ if (IsFree(tick)) {
+ return;
+ }
+ // If none of the above is hit, fallback to a regular wait
+ semaphore.Wait(tick);
+ }
+
+private:
+ vk::Semaphore semaphore; ///< Timeline semaphore.
+ std::atomic<u64> gpu_tick{0}; ///< Current known GPU tick.
+ std::atomic<u64> current_tick{1}; ///< Current logical tick.
+ std::atomic<bool> shutdown{false}; ///< True when the object is being destroyed.
+ std::thread debug_thread; ///< Debug thread to workaround validation layer bugs.
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index cfdcdd6ab..dedc9c466 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -135,64 +135,56 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
return std::memcmp(&rhs, this, sizeof *this) == 0;
}
-Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
- VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
- : gpu_addr{gpu_addr}, program_code{std::move(program_code)},
- registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
- compiler_settings, registry},
- entries{GenerateShaderEntries(shader_ir)} {}
+Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine, Tegra::Engines::ShaderType stage,
+ GPUVAddr gpu_addr_, VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code_,
+ u32 main_offset)
+ : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage, engine),
+ shader_ir(program_code, main_offset, compiler_settings, registry),
+ entries(GenerateShaderEntries(shader_ir)) {}
Shader::~Shader() = default;
-Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
- Tegra::Engines::ShaderType stage) {
- if (stage == ShaderType::Compute) {
- return system.GPU().KeplerCompute();
- } else {
- return system.GPU().Maxwell3D();
- }
-}
-
-VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
- VKUpdateDescriptorQueue& update_descriptor_queue,
- VKRenderPassCache& renderpass_cache)
- : VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
- scheduler{scheduler}, descriptor_pool{descriptor_pool},
- update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
+VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu_,
+ Tegra::Engines::Maxwell3D& maxwell3d_,
+ Tegra::Engines::KeplerCompute& kepler_compute_,
+ Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
+ VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
+ VKUpdateDescriptorQueue& update_descriptor_queue_,
+ VKRenderPassCache& renderpass_cache_)
+ : VideoCommon::ShaderCache<Shader>{rasterizer}, gpu{gpu_}, maxwell3d{maxwell3d_},
+ kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
+ scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
+ update_descriptor_queue{update_descriptor_queue_}, renderpass_cache{renderpass_cache_} {}
VKPipelineCache::~VKPipelineCache() = default;
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
- const auto& gpu = system.GPU().Maxwell3D();
-
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
+
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
- auto& memory_manager{system.GPU().MemoryManager()};
- const GPUVAddr program_addr{GetShaderAddress(system, program)};
- const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
if (!result) {
- const auto host_ptr{memory_manager.GetPointer(program_addr)};
+ const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
// No shader found - create a new one
- constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
+ static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
- stage_offset);
+ auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
+ std::move(code), stage_offset);
result = shader.get();
if (cpu_addr) {
@@ -215,11 +207,11 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
}
last_graphics_key = key;
- if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) {
+ if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
std::unique_lock lock{pipeline_cache};
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
if (is_cache_miss) {
- system.GPU().ShaderNotify().MarkSharderBuilding();
+ gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
@@ -233,13 +225,13 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second;
if (is_cache_miss) {
- system.GPU().ShaderNotify().MarkSharderBuilding();
+ gpu.ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key,
bindings, program);
- system.GPU().ShaderNotify().MarkShaderComplete();
+ gpu.ShaderNotify().MarkShaderComplete();
}
last_graphics_pipeline = entry.get();
return last_graphics_pipeline;
@@ -255,22 +247,21 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
}
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
- auto& memory_manager = system.GPU().MemoryManager();
- const auto program_addr = key.shader;
+ const GPUVAddr gpu_addr = key.shader;
- const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
ASSERT(cpu_addr);
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
if (!shader) {
// No shader found - create a new one
- const auto host_ptr = memory_manager.GetPointer(program_addr);
+ const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
- ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
+ ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
const std::size_t size_in_bytes = code.size() * sizeof(u64);
- auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
- std::move(code), KERNEL_MAIN_OFFSET);
+ auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
+ *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
shader = shader_info.get();
if (cpu_addr) {
@@ -298,7 +289,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
}
void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
- system.GPU().ShaderNotify().MarkShaderComplete();
+ gpu.ShaderNotify().MarkShaderComplete();
std::unique_lock lock{pipeline_cache};
graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
}
@@ -339,12 +330,8 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
- auto& memory_manager = system.GPU().MemoryManager();
- const auto& gpu = system.GPU().Maxwell3D();
-
Specialization specialization;
- if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
- device.IsExtExtendedDynamicStateSupported()) {
+ if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
float point_size;
std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
specialization.point_size = point_size;
@@ -364,12 +351,12 @@ VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
// Skip stages that are not enabled
- if (!gpu.regs.IsShaderConfigEnabled(index)) {
+ if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
continue;
}
- const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
- const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
+ const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index c04829e77..e558e6658 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -38,7 +38,6 @@ class RasterizerVulkan;
class VKComputePipeline;
class VKDescriptorPool;
class VKDevice;
-class VKFence;
class VKScheduler;
class VKUpdateDescriptorQueue;
@@ -85,7 +84,8 @@ namespace Vulkan {
class Shader {
public:
- explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
+ explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine,
+ Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr,
VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
~Shader();
@@ -97,22 +97,19 @@ public:
return shader_ir;
}
- const VideoCommon::Shader::Registry& GetRegistry() const {
- return registry;
- }
-
const VideoCommon::Shader::ShaderIR& GetIR() const {
return shader_ir;
}
+ const VideoCommon::Shader::Registry& GetRegistry() const {
+ return registry;
+ }
+
const ShaderEntries& GetEntries() const {
return entries;
}
private:
- static Tegra::Engines::ConstBufferEngineInterface& GetEngine(Core::System& system,
- Tegra::Engines::ShaderType stage);
-
GPUVAddr gpu_addr{};
VideoCommon::Shader::ProgramCode program_code;
VideoCommon::Shader::Registry registry;
@@ -122,9 +119,11 @@ private:
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
public:
- explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
- const VKDevice& device, VKScheduler& scheduler,
- VKDescriptorPool& descriptor_pool,
+ explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::Engines::KeplerCompute& kepler_compute,
+ Tegra::MemoryManager& gpu_memory, const VKDevice& device,
+ VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue,
VKRenderPassCache& renderpass_cache);
~VKPipelineCache() override;
@@ -145,7 +144,11 @@ private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
const FixedPipelineState& fixed_state);
- Core::System& system;
+ Tegra::GPU& gpu;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+ Tegra::MemoryManager& gpu_memory;
+
const VKDevice& device;
VKScheduler& scheduler;
VKDescriptorPool& descriptor_pool;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 6cd63d090..ee2d871e3 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -9,35 +9,33 @@
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
+using VideoCore::QueryType;
+
namespace {
constexpr std::array QUERY_TARGETS = {VK_QUERY_TYPE_OCCLUSION};
-constexpr VkQueryType GetTarget(VideoCore::QueryType type) {
+constexpr VkQueryType GetTarget(QueryType type) {
return QUERY_TARGETS[static_cast<std::size_t>(type)];
}
} // Anonymous namespace
-QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
+QueryPool::QueryPool(const VKDevice& device_, VKScheduler& scheduler, QueryType type_)
+ : ResourcePool{scheduler.GetMasterSemaphore(), GROW_STEP}, device{device_}, type{type_} {}
QueryPool::~QueryPool() = default;
-void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
- device = &device_;
- type = type_;
-}
-
-std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
+std::pair<VkQueryPool, u32> QueryPool::Commit() {
std::size_t index;
do {
- index = CommitResource(fence);
+ index = CommitResource();
} while (usage[index]);
usage[index] = true;
@@ -47,7 +45,7 @@ std::pair<VkQueryPool, u32> QueryPool::Commit(VKFence& fence) {
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
usage.resize(end);
- pools.push_back(device->GetLogical().CreateQueryPool({
+ pools.push_back(device.GetLogical().CreateQueryPool({
.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@@ -68,30 +66,39 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
}
-VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+VKQueryCache::VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler)
- : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
- QueryPool>{system, rasterizer},
- device{device}, scheduler{scheduler} {
- for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
- query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
+ : VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream,
+ HostCounter>{rasterizer, maxwell3d, gpu_memory},
+ device{device}, scheduler{scheduler}, query_pools{
+ QueryPool{device, scheduler,
+ QueryType::SamplesPassed},
+ } {}
+
+VKQueryCache::~VKQueryCache() {
+ // TODO(Rodrigo): This is a hack to destroy all HostCounter instances before the base class
+ // destructor is called. The query cache should be redesigned to have a proper ownership model
+ // instead of using shared pointers.
+ for (size_t query_type = 0; query_type < VideoCore::NumQueryTypes; ++query_type) {
+ auto& stream = Stream(static_cast<QueryType>(query_type));
+ stream.Update(false);
+ stream.Reset();
}
}
-VKQueryCache::~VKQueryCache() = default;
-
-std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
- return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
+std::pair<VkQueryPool, u32> VKQueryCache::AllocateQuery(QueryType type) {
+ return query_pools[static_cast<std::size_t>(type)].Commit();
}
-void VKQueryCache::Reserve(VideoCore::QueryType type, std::pair<VkQueryPool, u32> query) {
+void VKQueryCache::Reserve(QueryType type, std::pair<VkQueryPool, u32> query) {
query_pools[static_cast<std::size_t>(type)].Reserve(query);
}
HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
- VideoCore::QueryType type)
+ QueryType type)
: VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
- type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
+ type{type}, query{cache.AllocateQuery(type)}, tick{cache.Scheduler().CurrentTick()} {
const vk::Device* logical = &cache.Device().GetLogical();
cache.Scheduler().Record([logical, query = query](vk::CommandBuffer cmdbuf) {
logical->ResetQueryPoolEXT(query.first, query.second, 1);
@@ -109,7 +116,7 @@ void HostCounter::EndQuery() {
}
u64 HostCounter::BlockingQuery() const {
- if (ticks >= cache.Scheduler().Ticks()) {
+ if (tick >= cache.Scheduler().CurrentTick()) {
cache.Scheduler().Flush();
}
u64 data;
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.h b/src/video_core/renderer_vulkan/vk_query_cache.h
index 40119e6d3..2e57fb75d 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.h
+++ b/src/video_core/renderer_vulkan/vk_query_cache.h
@@ -11,7 +11,7 @@
#include "common/common_types.h"
#include "video_core/query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace VideoCore {
@@ -28,14 +28,12 @@ class VKScheduler;
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
-class QueryPool final : public VKFencedPool {
+class QueryPool final : public ResourcePool {
public:
- explicit QueryPool();
+ explicit QueryPool(const VKDevice& device, VKScheduler& scheduler, VideoCore::QueryType type);
~QueryPool() override;
- void Initialize(const VKDevice& device, VideoCore::QueryType type);
-
- std::pair<VkQueryPool, u32> Commit(VKFence& fence);
+ std::pair<VkQueryPool, u32> Commit();
void Reserve(std::pair<VkQueryPool, u32> query);
@@ -45,18 +43,18 @@ protected:
private:
static constexpr std::size_t GROW_STEP = 512;
- const VKDevice* device = nullptr;
- VideoCore::QueryType type = {};
+ const VKDevice& device;
+ const VideoCore::QueryType type;
std::vector<vk::QueryPool> pools;
std::vector<bool> usage;
};
class VKQueryCache final
- : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
- QueryPool> {
+ : public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter> {
public:
- explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+ explicit VKQueryCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
const VKDevice& device, VKScheduler& scheduler);
~VKQueryCache();
@@ -75,6 +73,7 @@ public:
private:
const VKDevice& device;
VKScheduler& scheduler;
+ std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
};
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
@@ -91,7 +90,7 @@ private:
VKQueryCache& cache;
const VideoCore::QueryType type;
const std::pair<VkQueryPool, u32> query;
- const u64 ticks;
+ const u64 tick;
};
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 936f76195..e0fb8693f 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -31,7 +31,6 @@
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -381,28 +380,28 @@ void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf) const {
}
}
-RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
- VKScreenInfo& screen_info, const VKDevice& device,
- VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, StateTracker& state_tracker,
- VKScheduler& scheduler)
- : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
- screen_info{screen_info}, device{device}, resource_manager{resource_manager},
- memory_manager{memory_manager}, state_tracker{state_tracker}, scheduler{scheduler},
- staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
- update_descriptor_queue(device, scheduler), renderpass_cache(device),
+RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu_,
+ Tegra::MemoryManager& gpu_memory_,
+ Core::Memory::Memory& cpu_memory, VKScreenInfo& screen_info_,
+ const VKDevice& device_, VKMemoryManager& memory_manager_,
+ StateTracker& state_tracker_, VKScheduler& scheduler_)
+ : RasterizerAccelerated(cpu_memory), gpu(gpu_), gpu_memory(gpu_memory_),
+ maxwell3d(gpu.Maxwell3D()), kepler_compute(gpu.KeplerCompute()), screen_info(screen_info_),
+ device(device_), memory_manager(memory_manager_), state_tracker(state_tracker_),
+ scheduler(scheduler_), staging_pool(device, memory_manager, scheduler),
+ descriptor_pool(device, scheduler_), update_descriptor_queue(device, scheduler),
+ renderpass_cache(device),
quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
- texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
- staging_pool),
- pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
- renderpass_cache),
- buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
- sampler_cache(device),
- fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
- query_cache(system, *this, device, scheduler),
- wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
+ texture_cache(*this, maxwell3d, gpu_memory, device, memory_manager, scheduler, staging_pool),
+ pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
+ descriptor_pool, update_descriptor_queue, renderpass_cache),
+ buffer_cache(*this, gpu_memory, cpu_memory, device, memory_manager, scheduler, staging_pool),
+ sampler_cache(device), query_cache(*this, maxwell3d, gpu_memory, device, scheduler),
+ fence_manager(*this, gpu, gpu_memory, texture_cache, buffer_cache, query_cache, device,
+ scheduler),
+ wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window) {
scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
@@ -414,15 +413,13 @@ RasterizerVulkan::~RasterizerVulkan() = default;
void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
+ SCOPE_EXIT({ gpu.TickWork(); });
FlushWork();
query_cache.UpdateCounters();
- SCOPE_EXIT({ system.GPU().TickWork(); });
-
- const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key;
- key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
+ key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported());
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
@@ -480,8 +477,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
void RasterizerVulkan::Clear() {
MICROPROFILE_SCOPE(Vulkan_Clearing);
- const auto& gpu = system.GPU().Maxwell3D();
- if (!system.GPU().Maxwell3D().ShouldExecute()) {
+ if (!maxwell3d.ShouldExecute()) {
return;
}
@@ -490,7 +486,7 @@ void RasterizerVulkan::Clear() {
query_cache.UpdateCounters();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A;
const bool use_depth = regs.clear_buffers.Z;
@@ -559,7 +555,7 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
query_cache.UpdateCounters();
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
auto& pipeline = pipeline_cache.GetComputePipeline({
.shader = code_addr,
.shared_memory_size = launch_desc.shared_alloc,
@@ -655,16 +651,14 @@ void RasterizerVulkan::SyncGuestHost() {
}
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
- gpu.MemoryManager().Write<u32>(addr, value);
+ gpu_memory.Write<u32>(addr, value);
return;
}
fence_manager.SignalSemaphore(addr, value);
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;
@@ -673,7 +667,6 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
}
void RasterizerVulkan::ReleaseFences() {
- auto& gpu{system.GPU()};
if (!gpu.IsAsync()) {
return;
}
@@ -751,10 +744,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerVulkan::SetupDirtyFlags() {
- state_tracker.Initialize();
-}
-
void RasterizerVulkan::FlushWork() {
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
@@ -778,10 +767,9 @@ void RasterizerVulkan::FlushWork() {
RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& maxwell3d = system.GPU().Maxwell3D();
- auto& dirty = maxwell3d.dirty.flags;
- auto& regs = maxwell3d.regs;
+ const auto& regs = maxwell3d.regs;
+ auto& dirty = maxwell3d.dirty.flags;
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false;
@@ -844,7 +832,7 @@ std::tuple<VkFramebuffer, VkExtent2D> RasterizerVulkan::ConfigureFramebuffers(
return true;
};
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
for (std::size_t index = 0; index < num_attachments; ++index) {
if (try_push(color_attachments[index])) {
@@ -880,13 +868,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
bool is_instanced) {
MICROPROFILE_SCOPE(Vulkan_Geometry);
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
+ const auto& regs = maxwell3d.regs;
SetupVertexArrays(buffer_bindings);
const u32 base_instance = regs.vb_base_instance;
- const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
+ const u32 num_instances = is_instanced ? maxwell3d.mme_draw.instance_count : 1;
const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
@@ -947,7 +934,7 @@ void RasterizerVulkan::SetupImageTransitions(
}
void RasterizerVulkan::UpdateDynamicStates() {
- auto& regs = system.GPU().Maxwell3D().regs;
+ auto& regs = maxwell3d.regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
@@ -961,14 +948,13 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthWriteEnable(regs);
UpdateDepthCompareOp(regs);
UpdateFrontFace(regs);
- UpdatePrimitiveTopology(regs);
UpdateStencilOp(regs);
UpdateStencilTestEnable(regs);
}
}
void RasterizerVulkan::BeginTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -1000,7 +986,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
}
void RasterizerVulkan::EndTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
if (regs.tfb_enabled == 0) {
return;
}
@@ -1013,7 +999,7 @@ void RasterizerVulkan::EndTransformFeedback() {
}
void RasterizerVulkan::SetupVertexArrays(BufferBindings& buffer_bindings) {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
const auto& vertex_array = regs.vertex_array[index];
@@ -1039,7 +1025,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
if (params.num_vertices == 0) {
return;
}
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads: {
if (!params.is_indexed) {
@@ -1087,8 +1073,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& gpu = system.GPU().Maxwell3D();
- const auto& shader_stage = gpu.state.shader_stages[stage];
+ const auto& shader_stage = maxwell3d.state.shader_stages[stage];
for (const auto& entry : entries.const_buffers) {
SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
}
@@ -1096,8 +1081,7 @@ void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, s
void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- auto& gpu{system.GPU()};
- const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
+ const auto& cbufs{maxwell3d.state.shader_stages[stage]};
for (const auto& entry : entries.global_buffers) {
const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
@@ -1107,19 +1091,17 @@ void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries,
void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.uniform_texels) {
- const auto image = GetTextureInfo(gpu, entry, stage).tic;
+ const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupUniformTexels(image, entry);
}
}
void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, stage, i);
+ const auto texture = GetTextureInfo(maxwell3d, entry, stage, i);
SetupTexture(texture, entry);
}
}
@@ -1127,25 +1109,23 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.storage_texels) {
- const auto image = GetTextureInfo(gpu, entry, stage).tic;
+ const auto image = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, stage).tic;
+ const auto tic = GetTextureInfo(maxwell3d, entry, stage).tic;
SetupImage(tic, entry);
}
}
void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+ const auto& launch_desc = kepler_compute.launch_description;
for (const auto& entry : entries.const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@@ -1159,7 +1139,7 @@ void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
- const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
+ const auto& cbufs{kepler_compute.launch_description.const_buffer_config};
for (const auto& entry : entries.global_buffers) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
SetupGlobalBuffer(entry, addr);
@@ -1168,19 +1148,17 @@ void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.uniform_texels) {
- const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupUniformTexels(image, entry);
}
}
void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.samplers) {
for (std::size_t i = 0; i < entry.size; ++i) {
- const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
+ const auto texture = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex, i);
SetupTexture(texture, entry);
}
}
@@ -1188,18 +1166,16 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.storage_texels) {
- const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ const auto image = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupStorageTexel(image, entry);
}
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images);
- const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.images) {
- const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+ const auto tic = GetTextureInfo(kepler_compute, entry, ComputeShaderIndex).tic;
SetupImage(tic, entry);
}
}
@@ -1223,9 +1199,8 @@ void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
}
void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
- auto& memory_manager{system.GPU().MemoryManager()};
- const auto actual_addr = memory_manager.Read<u64>(address);
- const auto size = memory_manager.Read<u32>(address + 8);
+ const u64 actual_addr = gpu_memory.Read<u64>(address);
+ const u32 size = gpu_memory.Read<u32>(address + 8);
if (size == 0) {
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry
@@ -1442,16 +1417,6 @@ void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
[front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); });
}
-void RasterizerVulkan::UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs) {
- if (!state_tracker.TouchPrimitiveTopology()) {
- return;
- }
- const Maxwell::PrimitiveTopology primitive_topology = regs.draw.topology.Value();
- scheduler.Record([this, primitive_topology](vk::CommandBuffer cmdbuf) {
- cmdbuf.SetPrimitiveTopologyEXT(MaxwellToVK::PrimitiveTopology(device, primitive_topology));
- });
-}
-
void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilOp()) {
return;
@@ -1508,7 +1473,7 @@ std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
}
std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
std::size_t size = 0;
for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
@@ -1523,9 +1488,8 @@ std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
}
std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
- const auto& regs = system.GPU().Maxwell3D().regs;
- return static_cast<std::size_t>(regs.index_array.count) *
- static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+ return static_cast<std::size_t>(maxwell3d.regs.index_array.count) *
+ static_cast<std::size_t>(maxwell3d.regs.index_array.FormatSizeInBytes());
}
std::size_t RasterizerVulkan::CalculateConstBufferSize(
@@ -1540,7 +1504,7 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize(
}
RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ const auto& regs = maxwell3d.regs;
const std::size_t num_attachments = static_cast<std::size_t>(regs.rt_control.count);
RenderPassParams params;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index f640ba649..237e51fa4 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -25,7 +25,6 @@
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@@ -106,10 +105,11 @@ struct ImageView {
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
public:
- explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
+ explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
VKScreenInfo& screen_info, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
- StateTracker& state_tracker, VKScheduler& scheduler);
+ VKMemoryManager& memory_manager, StateTracker& state_tracker,
+ VKScheduler& scheduler);
~RasterizerVulkan() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -135,7 +135,6 @@ public:
const Tegra::Engines::Fermi2D::Config& copy_config) override;
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
u32 pixel_stride) override;
- void SetupDirtyFlags() override;
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders;
@@ -260,7 +259,6 @@ private:
void UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs);
- void UpdatePrimitiveTopology(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
@@ -279,11 +277,13 @@ private:
VkBuffer DefaultBuffer();
- Core::System& system;
- Core::Frontend::EmuWindow& render_window;
+ Tegra::GPU& gpu;
+ Tegra::MemoryManager& gpu_memory;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::Engines::KeplerCompute& kepler_compute;
+
VKScreenInfo& screen_info;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
StateTracker& state_tracker;
VKScheduler& scheduler;
@@ -300,8 +300,8 @@ private:
VKPipelineCache pipeline_cache;
VKBufferCache buffer_cache;
VKSamplerCache sampler_cache;
- VKFenceManager fence_manager;
VKQueryCache query_cache;
+ VKFenceManager fence_manager;
vk::Buffer default_buffer;
VKMemoryCommit default_buffer_commit;
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.cpp b/src/video_core/renderer_vulkan/vk_resource_manager.cpp
deleted file mode 100644
index f19330a36..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.cpp
+++ /dev/null
@@ -1,311 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <optional>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-namespace {
-
-// TODO(Rodrigo): Fine tune these numbers.
-constexpr std::size_t COMMAND_BUFFER_POOL_SIZE = 0x1000;
-constexpr std::size_t FENCES_GROW_STEP = 0x40;
-
-constexpr VkFenceCreateInfo BuildFenceCreateInfo() {
- return {
- .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
- .pNext = nullptr,
- .flags = 0,
- };
-}
-
-} // Anonymous namespace
-
-class CommandBufferPool final : public VKFencedPool {
-public:
- explicit CommandBufferPool(const VKDevice& device)
- : VKFencedPool(COMMAND_BUFFER_POOL_SIZE), device{device} {}
-
- void Allocate(std::size_t begin, std::size_t end) override {
- // Command buffers are going to be commited, recorded, executed every single usage cycle.
- // They are also going to be reseted when commited.
- Pool& pool = pools.emplace_back();
- pool.handle = device.GetLogical().CreateCommandPool({
- .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
- .pNext = nullptr,
- .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
- VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
- .queueFamilyIndex = device.GetGraphicsFamily(),
- });
- pool.cmdbufs = pool.handle.Allocate(COMMAND_BUFFER_POOL_SIZE);
- }
-
- VkCommandBuffer Commit(VKFence& fence) {
- const std::size_t index = CommitResource(fence);
- const auto pool_index = index / COMMAND_BUFFER_POOL_SIZE;
- const auto sub_index = index % COMMAND_BUFFER_POOL_SIZE;
- return pools[pool_index].cmdbufs[sub_index];
- }
-
-private:
- struct Pool {
- vk::CommandPool handle;
- vk::CommandBuffers cmdbufs;
- };
-
- const VKDevice& device;
- std::vector<Pool> pools;
-};
-
-VKResource::VKResource() = default;
-
-VKResource::~VKResource() = default;
-
-VKFence::VKFence(const VKDevice& device)
- : device{device}, handle{device.GetLogical().CreateFence(BuildFenceCreateInfo())} {}
-
-VKFence::~VKFence() = default;
-
-void VKFence::Wait() {
- switch (const VkResult result = handle.Wait()) {
- case VK_SUCCESS:
- return;
- case VK_ERROR_DEVICE_LOST:
- device.ReportLoss();
- [[fallthrough]];
- default:
- throw vk::Exception(result);
- }
-}
-
-void VKFence::Release() {
- ASSERT(is_owned);
- is_owned = false;
-}
-
-void VKFence::Commit() {
- is_owned = true;
- is_used = true;
-}
-
-bool VKFence::Tick(bool gpu_wait, bool owner_wait) {
- if (!is_used) {
- // If a fence is not used it's always free.
- return true;
- }
- if (is_owned && !owner_wait) {
- // The fence is still being owned (Release has not been called) and ownership wait has
- // not been asked.
- return false;
- }
-
- if (gpu_wait) {
- // Wait for the fence if it has been requested.
- (void)handle.Wait();
- } else {
- if (handle.GetStatus() != VK_SUCCESS) {
- // Vulkan fence is not ready, not much it can do here
- return false;
- }
- }
-
- // Broadcast resources their free state.
- for (auto* resource : protected_resources) {
- resource->OnFenceRemoval(this);
- }
- protected_resources.clear();
-
- // Prepare fence for reusage.
- handle.Reset();
- is_used = false;
- return true;
-}
-
-void VKFence::Protect(VKResource* resource) {
- protected_resources.push_back(resource);
-}
-
-void VKFence::Unprotect(VKResource* resource) {
- const auto it = std::find(protected_resources.begin(), protected_resources.end(), resource);
- ASSERT(it != protected_resources.end());
-
- resource->OnFenceRemoval(this);
- protected_resources.erase(it);
-}
-
-void VKFence::RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept {
- std::replace(std::begin(protected_resources), std::end(protected_resources), old_resource,
- new_resource);
-}
-
-VKFenceWatch::VKFenceWatch() = default;
-
-VKFenceWatch::VKFenceWatch(VKFence& initial_fence) {
- Watch(initial_fence);
-}
-
-VKFenceWatch::VKFenceWatch(VKFenceWatch&& rhs) noexcept {
- fence = std::exchange(rhs.fence, nullptr);
- if (fence) {
- fence->RedirectProtection(&rhs, this);
- }
-}
-
-VKFenceWatch& VKFenceWatch::operator=(VKFenceWatch&& rhs) noexcept {
- fence = std::exchange(rhs.fence, nullptr);
- if (fence) {
- fence->RedirectProtection(&rhs, this);
- }
- return *this;
-}
-
-VKFenceWatch::~VKFenceWatch() {
- if (fence) {
- fence->Unprotect(this);
- }
-}
-
-void VKFenceWatch::Wait() {
- if (fence == nullptr) {
- return;
- }
- fence->Wait();
- fence->Unprotect(this);
-}
-
-void VKFenceWatch::Watch(VKFence& new_fence) {
- Wait();
- fence = &new_fence;
- fence->Protect(this);
-}
-
-bool VKFenceWatch::TryWatch(VKFence& new_fence) {
- if (fence) {
- return false;
- }
- fence = &new_fence;
- fence->Protect(this);
- return true;
-}
-
-void VKFenceWatch::OnFenceRemoval(VKFence* signaling_fence) {
- ASSERT_MSG(signaling_fence == fence, "Removing the wrong fence");
- fence = nullptr;
-}
-
-VKFencedPool::VKFencedPool(std::size_t grow_step) : grow_step{grow_step} {}
-
-VKFencedPool::~VKFencedPool() = default;
-
-std::size_t VKFencedPool::CommitResource(VKFence& fence) {
- const auto Search = [&](std::size_t begin, std::size_t end) -> std::optional<std::size_t> {
- for (std::size_t iterator = begin; iterator < end; ++iterator) {
- if (watches[iterator]->TryWatch(fence)) {
- // The resource is now being watched, a free resource was successfully found.
- return iterator;
- }
- }
- return {};
- };
- // Try to find a free resource from the hinted position to the end.
- auto found = Search(free_iterator, watches.size());
- if (!found) {
- // Search from beginning to the hinted position.
- found = Search(0, free_iterator);
- if (!found) {
- // Both searches failed, the pool is full; handle it.
- const std::size_t free_resource = ManageOverflow();
-
- // Watch will wait for the resource to be free.
- watches[free_resource]->Watch(fence);
- found = free_resource;
- }
- }
- // Free iterator is hinted to the resource after the one that's been commited.
- free_iterator = (*found + 1) % watches.size();
- return *found;
-}
-
-std::size_t VKFencedPool::ManageOverflow() {
- const std::size_t old_capacity = watches.size();
- Grow();
-
- // The last entry is guaranted to be free, since it's the first element of the freshly
- // allocated resources.
- return old_capacity;
-}
-
-void VKFencedPool::Grow() {
- const std::size_t old_capacity = watches.size();
- watches.resize(old_capacity + grow_step);
- std::generate(watches.begin() + old_capacity, watches.end(),
- []() { return std::make_unique<VKFenceWatch>(); });
- Allocate(old_capacity, old_capacity + grow_step);
-}
-
-VKResourceManager::VKResourceManager(const VKDevice& device) : device{device} {
- GrowFences(FENCES_GROW_STEP);
- command_buffer_pool = std::make_unique<CommandBufferPool>(device);
-}
-
-VKResourceManager::~VKResourceManager() = default;
-
-VKFence& VKResourceManager::CommitFence() {
- const auto StepFences = [&](bool gpu_wait, bool owner_wait) -> VKFence* {
- const auto Tick = [=](auto& fence) { return fence->Tick(gpu_wait, owner_wait); };
- const auto hinted = fences.begin() + fences_iterator;
-
- auto it = std::find_if(hinted, fences.end(), Tick);
- if (it == fences.end()) {
- it = std::find_if(fences.begin(), hinted, Tick);
- if (it == hinted) {
- return nullptr;
- }
- }
- fences_iterator = std::distance(fences.begin(), it) + 1;
- if (fences_iterator >= fences.size())
- fences_iterator = 0;
-
- auto& fence = *it;
- fence->Commit();
- return fence.get();
- };
-
- VKFence* found_fence = StepFences(false, false);
- if (!found_fence) {
- // Try again, this time waiting.
- found_fence = StepFences(true, false);
-
- if (!found_fence) {
- // Allocate new fences and try again.
- LOG_INFO(Render_Vulkan, "Allocating new fences {} -> {}", fences.size(),
- fences.size() + FENCES_GROW_STEP);
-
- GrowFences(FENCES_GROW_STEP);
- found_fence = StepFences(true, false);
- ASSERT(found_fence != nullptr);
- }
- }
- return *found_fence;
-}
-
-VkCommandBuffer VKResourceManager::CommitCommandBuffer(VKFence& fence) {
- return command_buffer_pool->Commit(fence);
-}
-
-void VKResourceManager::GrowFences(std::size_t new_fences_count) {
- const std::size_t previous_size = fences.size();
- fences.resize(previous_size + new_fences_count);
-
- std::generate(fences.begin() + previous_size, fences.end(),
- [this] { return std::make_unique<VKFence>(device); });
-}
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_manager.h b/src/video_core/renderer_vulkan/vk_resource_manager.h
deleted file mode 100644
index f683d2276..000000000
--- a/src/video_core/renderer_vulkan/vk_resource_manager.h
+++ /dev/null
@@ -1,196 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <cstddef>
-#include <memory>
-#include <vector>
-#include "video_core/renderer_vulkan/wrapper.h"
-
-namespace Vulkan {
-
-class VKDevice;
-class VKFence;
-class VKResourceManager;
-
-class CommandBufferPool;
-
-/// Interface for a Vulkan resource
-class VKResource {
-public:
- explicit VKResource();
- virtual ~VKResource();
-
- /**
- * Signals the object that an owning fence has been signaled.
- * @param signaling_fence Fence that signals its usage end.
- */
- virtual void OnFenceRemoval(VKFence* signaling_fence) = 0;
-};
-
-/**
- * Fences take ownership of objects, protecting them from GPU-side or driver-side concurrent access.
- * They must be commited from the resource manager. Their usage flow is: commit the fence from the
- * resource manager, protect resources with it and use them, send the fence to an execution queue
- * and Wait for it if needed and then call Release. Used resources will automatically be signaled
- * when they are free to be reused.
- * @brief Protects resources for concurrent usage and signals its release.
- */
-class VKFence {
- friend class VKResourceManager;
-
-public:
- explicit VKFence(const VKDevice& device);
- ~VKFence();
-
- /**
- * Waits for the fence to be signaled.
- * @warning You must have ownership of the fence and it has to be previously sent to a queue to
- * call this function.
- */
- void Wait();
-
- /**
- * Releases ownership of the fence. Pass after it has been sent to an execution queue.
- * Unmanaged usage of the fence after the call will result in undefined behavior because it may
- * be being used for something else.
- */
- void Release();
-
- /// Protects a resource with this fence.
- void Protect(VKResource* resource);
-
- /// Removes protection for a resource.
- void Unprotect(VKResource* resource);
-
- /// Redirects one protected resource to a new address.
- void RedirectProtection(VKResource* old_resource, VKResource* new_resource) noexcept;
-
- /// Retreives the fence.
- operator VkFence() const {
- return *handle;
- }
-
-private:
- /// Take ownership of the fence.
- void Commit();
-
- /**
- * Updates the fence status.
- * @warning Waiting for the owner might soft lock the execution.
- * @param gpu_wait Wait for the fence to be signaled by the driver.
- * @param owner_wait Wait for the owner to signal its freedom.
- * @returns True if the fence is free. Waiting for gpu and owner will always return true.
- */
- bool Tick(bool gpu_wait, bool owner_wait);
-
- const VKDevice& device; ///< Device handler
- vk::Fence handle; ///< Vulkan fence
- std::vector<VKResource*> protected_resources; ///< List of resources protected by this fence
- bool is_owned = false; ///< The fence has been commited but not released yet.
- bool is_used = false; ///< The fence has been commited but it has not been checked to be free.
-};
-
-/**
- * A fence watch is used to keep track of the usage of a fence and protect a resource or set of
- * resources without having to inherit VKResource from their handlers.
- */
-class VKFenceWatch final : public VKResource {
-public:
- explicit VKFenceWatch();
- VKFenceWatch(VKFence& initial_fence);
- VKFenceWatch(VKFenceWatch&&) noexcept;
- VKFenceWatch(const VKFenceWatch&) = delete;
- ~VKFenceWatch() override;
-
- VKFenceWatch& operator=(VKFenceWatch&&) noexcept;
-
- /// Waits for the fence to be released.
- void Wait();
-
- /**
- * Waits for a previous fence and watches a new one.
- * @param new_fence New fence to wait to.
- */
- void Watch(VKFence& new_fence);
-
- /**
- * Checks if it's currently being watched and starts watching it if it's available.
- * @returns True if a watch has started, false if it's being watched.
- */
- bool TryWatch(VKFence& new_fence);
-
- void OnFenceRemoval(VKFence* signaling_fence) override;
-
- /**
- * Do not use it paired with Watch. Use TryWatch instead.
- * Returns true when the watch is free.
- */
- bool IsUsed() const {
- return fence != nullptr;
- }
-
-private:
- VKFence* fence{}; ///< Fence watching this resource. nullptr when the watch is free.
-};
-
-/**
- * Handles a pool of resources protected by fences. Manages resource overflow allocating more
- * resources.
- */
-class VKFencedPool {
-public:
- explicit VKFencedPool(std::size_t grow_step);
- virtual ~VKFencedPool();
-
-protected:
- /**
- * Commits a free resource and protects it with a fence. It may allocate new resources.
- * @param fence Fence that protects the commited resource.
- * @returns Index of the resource commited.
- */
- std::size_t CommitResource(VKFence& fence);
-
- /// Called when a chunk of resources have to be allocated.
- virtual void Allocate(std::size_t begin, std::size_t end) = 0;
-
-private:
- /// Manages pool overflow allocating new resources.
- std::size_t ManageOverflow();
-
- /// Allocates a new page of resources.
- void Grow();
-
- std::size_t grow_step = 0; ///< Number of new resources created after an overflow
- std::size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
- std::vector<std::unique_ptr<VKFenceWatch>> watches; ///< Set of watched resources
-};
-
-/**
- * The resource manager handles all resources that can be protected with a fence avoiding
- * driver-side or GPU-side concurrent usage. Usage is documented in VKFence.
- */
-class VKResourceManager final {
-public:
- explicit VKResourceManager(const VKDevice& device);
- ~VKResourceManager();
-
- /// Commits a fence. It has to be sent to a queue and released.
- VKFence& CommitFence();
-
- /// Commits an unused command buffer and protects it with a fence.
- VkCommandBuffer CommitCommandBuffer(VKFence& fence);
-
-private:
- /// Allocates new fences.
- void GrowFences(std::size_t new_fences_count);
-
- const VKDevice& device; ///< Device handler.
- std::size_t fences_iterator = 0; ///< Index where a free fence is likely to be found.
- std::vector<std::unique_ptr<VKFence>> fences; ///< Pool of fences.
- std::unique_ptr<CommandBufferPool> command_buffer_pool; ///< Pool of command buffers.
-};
-
-} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.cpp b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
new file mode 100644
index 000000000..ee274ac59
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.cpp
@@ -0,0 +1,63 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <optional>
+
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
+#include "video_core/renderer_vulkan/vk_resource_pool.h"
+
+namespace Vulkan {
+
+ResourcePool::ResourcePool(MasterSemaphore& master_semaphore_, size_t grow_step_)
+ : master_semaphore{master_semaphore_}, grow_step{grow_step_} {}
+
+ResourcePool::~ResourcePool() = default;
+
+size_t ResourcePool::CommitResource() {
+ // Refresh semaphore to query updated results
+ master_semaphore.Refresh();
+
+ const auto search = [this](size_t begin, size_t end) -> std::optional<size_t> {
+ for (size_t iterator = begin; iterator < end; ++iterator) {
+ if (master_semaphore.IsFree(ticks[iterator])) {
+ ticks[iterator] = master_semaphore.CurrentTick();
+ return iterator;
+ }
+ }
+ return {};
+ };
+ // Try to find a free resource from the hinted position to the end.
+ auto found = search(free_iterator, ticks.size());
+ if (!found) {
+ // Search from beginning to the hinted position.
+ found = search(0, free_iterator);
+ if (!found) {
+ // Both searches failed, the pool is full; handle it.
+ const size_t free_resource = ManageOverflow();
+
+ ticks[free_resource] = master_semaphore.CurrentTick();
+ found = free_resource;
+ }
+ }
+ // Free iterator is hinted to the resource after the one that's been commited.
+ free_iterator = (*found + 1) % ticks.size();
+ return *found;
+}
+
+size_t ResourcePool::ManageOverflow() {
+ const size_t old_capacity = ticks.size();
+ Grow();
+
+ // The last entry is guaranted to be free, since it's the first element of the freshly
+ // allocated resources.
+ return old_capacity;
+}
+
+void ResourcePool::Grow() {
+ const size_t old_capacity = ticks.size();
+ ticks.resize(old_capacity + grow_step);
+ Allocate(old_capacity, old_capacity + grow_step);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_resource_pool.h b/src/video_core/renderer_vulkan/vk_resource_pool.h
new file mode 100644
index 000000000..a018c7ec2
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_resource_pool.h
@@ -0,0 +1,43 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace Vulkan {
+
+class MasterSemaphore;
+
+/**
+ * Handles a pool of resources protected by fences. Manages resource overflow allocating more
+ * resources.
+ */
+class ResourcePool {
+public:
+ explicit ResourcePool(MasterSemaphore& master_semaphore, size_t grow_step);
+ virtual ~ResourcePool();
+
+protected:
+ size_t CommitResource();
+
+ /// Called when a chunk of resources have to be allocated.
+ virtual void Allocate(size_t begin, size_t end) = 0;
+
+private:
+ /// Manages pool overflow allocating new resources.
+ size_t ManageOverflow();
+
+ /// Allocates a new page of resources.
+ void Grow();
+
+ MasterSemaphore& master_semaphore;
+ size_t grow_step = 0; ///< Number of new resources created after an overflow
+ size_t free_iterator = 0; ///< Hint to where the next free resources is likely to be found
+ std::vector<u64> ticks; ///< Ticks for each resource
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index dbbd0961a..1a483dc71 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -10,9 +10,10 @@
#include "common/microprofile.h"
#include "common/thread.h"
+#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/wrapper.h"
@@ -35,10 +36,10 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
last = nullptr;
}
-VKScheduler::VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
- StateTracker& state_tracker)
- : device{device}, resource_manager{resource_manager}, state_tracker{state_tracker},
- next_fence{&resource_manager.CommitFence()} {
+VKScheduler::VKScheduler(const VKDevice& device_, StateTracker& state_tracker_)
+ : device{device_}, state_tracker{state_tracker_},
+ master_semaphore{std::make_unique<MasterSemaphore>(device)},
+ command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
AcquireNewChunk();
AllocateNewContext();
worker_thread = std::thread(&VKScheduler::WorkerThread, this);
@@ -50,20 +51,27 @@ VKScheduler::~VKScheduler() {
worker_thread.join();
}
-void VKScheduler::Flush(bool release_fence, VkSemaphore semaphore) {
+u64 VKScheduler::CurrentTick() const noexcept {
+ return master_semaphore->CurrentTick();
+}
+
+bool VKScheduler::IsFree(u64 tick) const noexcept {
+ return master_semaphore->IsFree(tick);
+}
+
+void VKScheduler::Wait(u64 tick) {
+ master_semaphore->Wait(tick);
+}
+
+void VKScheduler::Flush(VkSemaphore semaphore) {
SubmitExecution(semaphore);
- if (release_fence) {
- current_fence->Release();
- }
AllocateNewContext();
}
-void VKScheduler::Finish(bool release_fence, VkSemaphore semaphore) {
+void VKScheduler::Finish(VkSemaphore semaphore) {
+ const u64 presubmit_tick = CurrentTick();
SubmitExecution(semaphore);
- current_fence->Wait();
- if (release_fence) {
- current_fence->Release();
- }
+ Wait(presubmit_tick);
AllocateNewContext();
}
@@ -160,18 +168,38 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
current_cmdbuf.End();
+ const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+ const u32 num_signal_semaphores = semaphore ? 2U : 1U;
+
+ const u64 signal_value = master_semaphore->CurrentTick();
+ const u64 wait_value = signal_value - 1;
+ const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+
+ master_semaphore->NextTick();
+
+ const std::array signal_values{signal_value, u64(0)};
+ const std::array signal_semaphores{timeline_semaphore, semaphore};
+
+ const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+ .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+ .pNext = nullptr,
+ .waitSemaphoreValueCount = 1,
+ .pWaitSemaphoreValues = &wait_value,
+ .signalSemaphoreValueCount = num_signal_semaphores,
+ .pSignalSemaphoreValues = signal_values.data(),
+ };
const VkSubmitInfo submit_info{
.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
- .pNext = nullptr,
- .waitSemaphoreCount = 0,
- .pWaitSemaphores = nullptr,
- .pWaitDstStageMask = nullptr,
+ .pNext = &timeline_si,
+ .waitSemaphoreCount = 1,
+ .pWaitSemaphores = &timeline_semaphore,
+ .pWaitDstStageMask = &wait_stage_mask,
.commandBufferCount = 1,
.pCommandBuffers = current_cmdbuf.address(),
- .signalSemaphoreCount = semaphore ? 1U : 0U,
- .pSignalSemaphores = &semaphore,
+ .signalSemaphoreCount = num_signal_semaphores,
+ .pSignalSemaphores = signal_semaphores.data(),
};
- switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info, *current_fence)) {
+ switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
case VK_SUCCESS:
break;
case VK_ERROR_DEVICE_LOST:
@@ -183,14 +211,9 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
}
void VKScheduler::AllocateNewContext() {
- ++ticks;
-
std::unique_lock lock{mutex};
- current_fence = next_fence;
- next_fence = &resource_manager.CommitFence();
- current_cmdbuf = vk::CommandBuffer(resource_manager.CommitCommandBuffer(*current_fence),
- device.GetDispatchLoader());
+ current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
current_cmdbuf.Begin({
.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 970a65566..7be8a19f0 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -16,42 +16,33 @@
namespace Vulkan {
+class CommandPool;
+class MasterSemaphore;
class StateTracker;
class VKDevice;
-class VKFence;
class VKQueryCache;
-class VKResourceManager;
-
-class VKFenceView {
-public:
- VKFenceView() = default;
- VKFenceView(VKFence* const& fence) : fence{fence} {}
-
- VKFence* operator->() const noexcept {
- return fence;
- }
-
- operator VKFence&() const noexcept {
- return *fence;
- }
-
-private:
- VKFence* const& fence;
-};
/// The scheduler abstracts command buffer and fence management with an interface that's able to do
/// OpenGL-like operations on Vulkan command buffers.
class VKScheduler {
public:
- explicit VKScheduler(const VKDevice& device, VKResourceManager& resource_manager,
- StateTracker& state_tracker);
+ explicit VKScheduler(const VKDevice& device, StateTracker& state_tracker);
~VKScheduler();
+ /// Returns the current command buffer tick.
+ [[nodiscard]] u64 CurrentTick() const noexcept;
+
+ /// Returns true when a tick has been triggered by the GPU.
+ [[nodiscard]] bool IsFree(u64 tick) const noexcept;
+
+ /// Waits for the given tick to trigger on the GPU.
+ void Wait(u64 tick);
+
/// Sends the current execution context to the GPU.
- void Flush(bool release_fence = true, VkSemaphore semaphore = nullptr);
+ void Flush(VkSemaphore semaphore = nullptr);
/// Sends the current execution context to the GPU and waits for it to complete.
- void Finish(bool release_fence = true, VkSemaphore semaphore = nullptr);
+ void Finish(VkSemaphore semaphore = nullptr);
/// Waits for the worker thread to finish executing everything. After this function returns it's
/// safe to touch worker resources.
@@ -86,14 +77,9 @@ public:
(void)chunk->Record(command);
}
- /// Gets a reference to the current fence.
- VKFenceView GetFence() const {
- return current_fence;
- }
-
- /// Returns the current command buffer tick.
- u64 Ticks() const {
- return ticks;
+ /// Returns the master timeline semaphore.
+ [[nodiscard]] MasterSemaphore& GetMasterSemaphore() const noexcept {
+ return *master_semaphore;
}
private:
@@ -171,6 +157,13 @@ private:
std::array<u8, 0x8000> data{};
};
+ struct State {
+ VkRenderPass renderpass = nullptr;
+ VkFramebuffer framebuffer = nullptr;
+ VkExtent2D render_area = {0, 0};
+ VkPipeline graphics_pipeline = nullptr;
+ };
+
void WorkerThread();
void SubmitExecution(VkSemaphore semaphore);
@@ -186,30 +179,23 @@ private:
void AcquireNewChunk();
const VKDevice& device;
- VKResourceManager& resource_manager;
StateTracker& state_tracker;
+ std::unique_ptr<MasterSemaphore> master_semaphore;
+ std::unique_ptr<CommandPool> command_pool;
+
VKQueryCache* query_cache = nullptr;
vk::CommandBuffer current_cmdbuf;
- VKFence* current_fence = nullptr;
- VKFence* next_fence = nullptr;
-
- struct State {
- VkRenderPass renderpass = nullptr;
- VkFramebuffer framebuffer = nullptr;
- VkExtent2D render_area = {0, 0};
- VkPipeline graphics_pipeline = nullptr;
- } state;
std::unique_ptr<CommandChunk> chunk;
std::thread worker_thread;
+ State state;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_queue;
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
std::mutex mutex;
std::condition_variable cv;
- std::atomic<u64> ticks = 0;
bool quit = false;
};
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 5eca0ab91..2fd3b7f39 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -10,36 +10,18 @@
#include "common/bit_util.h"
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
-VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
- u64 last_epoch)
- : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
+VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer_)
+ : buffer{std::move(buffer_)} {}
-VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
- buffer = std::move(rhs.buffer);
- watch = std::move(rhs.watch);
- last_epoch = rhs.last_epoch;
-}
-
-VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
-
-VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
- StagingBuffer&& rhs) noexcept {
- buffer = std::move(rhs.buffer);
- watch = std::move(rhs.watch);
- last_epoch = rhs.last_epoch;
- return *this;
-}
-
-VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
- VKScheduler& scheduler)
- : device{device}, memory_manager{memory_manager}, scheduler{scheduler} {}
+VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device_, VKMemoryManager& memory_manager_,
+ VKScheduler& scheduler_)
+ : device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_} {}
VKStagingBufferPool::~VKStagingBufferPool() = default;
@@ -51,7 +33,6 @@ VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visib
}
void VKStagingBufferPool::TickFrame() {
- ++epoch;
current_delete_level = (current_delete_level + 1) % NumLevels;
ReleaseCache(true);
@@ -59,11 +40,12 @@ void VKStagingBufferPool::TickFrame() {
}
VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
- for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
- if (entry.watch.TryWatch(scheduler.GetFence())) {
- entry.last_epoch = epoch;
- return &*entry.buffer;
+ for (StagingBuffer& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
+ if (!scheduler.IsFree(entry.tick)) {
+ continue;
}
+ entry.tick = scheduler.CurrentTick();
+ return &*entry.buffer;
}
return nullptr;
}
@@ -86,8 +68,10 @@ VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_v
});
buffer->commit = memory_manager.Commit(buffer->handle, host_visible);
- auto& entries = GetCache(host_visible)[log2].entries;
- return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
+ std::vector<StagingBuffer>& entries = GetCache(host_visible)[log2].entries;
+ StagingBuffer& entry = entries.emplace_back(std::move(buffer));
+ entry.tick = scheduler.CurrentTick();
+ return *entry.buffer;
}
VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
@@ -109,9 +93,8 @@ u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t lo
auto& entries = staging.entries;
const std::size_t old_size = entries.size();
- const auto is_deleteable = [this](const auto& entry) {
- static constexpr u64 epochs_to_destroy = 180;
- return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
+ const auto is_deleteable = [this](const StagingBuffer& entry) {
+ return scheduler.IsFree(entry.tick);
};
const std::size_t begin_offset = staging.delete_index;
const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 3c4901437..2dd5049ac 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -10,13 +10,11 @@
#include "common/common_types.h"
#include "video_core/renderer_vulkan/vk_memory_manager.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/wrapper.h"
namespace Vulkan {
class VKDevice;
-class VKFenceWatch;
class VKScheduler;
struct VKBuffer final {
@@ -36,16 +34,10 @@ public:
private:
struct StagingBuffer final {
- explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
- StagingBuffer(StagingBuffer&& rhs) noexcept;
- StagingBuffer(const StagingBuffer&) = delete;
- ~StagingBuffer();
-
- StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
+ explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer);
std::unique_ptr<VKBuffer> buffer;
- VKFenceWatch watch;
- u64 last_epoch = 0;
+ u64 tick = 0;
};
struct StagingBuffers final {
@@ -73,8 +65,6 @@ private:
StagingBuffersCache host_staging_buffers;
StagingBuffersCache device_staging_buffers;
- u64 epoch = 0;
-
std::size_t current_delete_level = 0;
};
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index 9151d9fb1..5d2c4a796 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -42,7 +42,6 @@ Flags MakeInvalidationFlags() {
flags[DepthWriteEnable] = true;
flags[DepthCompareOp] = true;
flags[FrontFace] = true;
- flags[PrimitiveTopology] = true;
flags[StencilOp] = true;
flags[StencilTestEnable] = true;
return flags;
@@ -112,10 +111,6 @@ void SetupDirtyFrontFace(Tables& tables) {
table[OFF(screen_y_control)] = FrontFace;
}
-void SetupDirtyPrimitiveTopology(Tables& tables) {
- tables[0][OFF(draw.topology)] = PrimitiveTopology;
-}
-
void SetupDirtyStencilOp(Tables& tables) {
auto& table = tables[0];
table[OFF(stencil_front_op_fail)] = StencilOp;
@@ -137,12 +132,9 @@ void SetupDirtyStencilTestEnable(Tables& tables) {
} // Anonymous namespace
-StateTracker::StateTracker(Core::System& system)
- : system{system}, invalidation_flags{MakeInvalidationFlags()} {}
-
-void StateTracker::Initialize() {
- auto& dirty = system.GPU().Maxwell3D().dirty;
- auto& tables = dirty.tables;
+StateTracker::StateTracker(Tegra::GPU& gpu)
+ : flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
+ auto& tables = gpu.Maxwell3D().dirty.tables;
SetupDirtyRenderTargets(tables);
SetupDirtyViewports(tables);
SetupDirtyScissors(tables);
@@ -156,13 +148,8 @@ void StateTracker::Initialize() {
SetupDirtyDepthWriteEnable(tables);
SetupDirtyDepthCompareOp(tables);
SetupDirtyFrontFace(tables);
- SetupDirtyPrimitiveTopology(tables);
SetupDirtyStencilOp(tables);
SetupDirtyStencilTestEnable(tables);
}
-void StateTracker::InvalidateCommandBufferState() {
- system.GPU().Maxwell3D().dirty.flags |= invalidation_flags;
-}
-
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 54ca0d6c6..1de789e57 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -32,7 +32,6 @@ enum : u8 {
DepthWriteEnable,
DepthCompareOp,
FrontFace,
- PrimitiveTopology,
StencilOp,
StencilTestEnable,
@@ -43,12 +42,15 @@ static_assert(Last <= std::numeric_limits<u8>::max());
} // namespace Dirty
class StateTracker {
-public:
- explicit StateTracker(Core::System& system);
+ using Maxwell = Tegra::Engines::Maxwell3D::Regs;
- void Initialize();
+public:
+ explicit StateTracker(Tegra::GPU& gpu);
- void InvalidateCommandBufferState();
+ void InvalidateCommandBufferState() {
+ flags |= invalidation_flags;
+ current_topology = INVALID_TOPOLOGY;
+ }
bool TouchViewports() {
return Exchange(Dirty::Viewports, false);
@@ -102,10 +104,6 @@ public:
return Exchange(Dirty::FrontFace, false);
}
- bool TouchPrimitiveTopology() {
- return Exchange(Dirty::PrimitiveTopology, false);
- }
-
bool TouchStencilOp() {
return Exchange(Dirty::StencilOp, false);
}
@@ -114,16 +112,24 @@ public:
return Exchange(Dirty::StencilTestEnable, false);
}
+ bool ChangePrimitiveTopology(Maxwell::PrimitiveTopology new_topology) {
+ const bool has_changed = current_topology != new_topology;
+ current_topology = new_topology;
+ return has_changed;
+ }
+
private:
+ static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
+
bool Exchange(std::size_t id, bool new_value) const noexcept {
- auto& flags = system.GPU().Maxwell3D().dirty.flags;
const bool is_dirty = flags[id];
flags[id] = new_value;
return is_dirty;
}
- Core::System& system;
+ Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
+ Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
};
} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
index a5526a3f5..1b59612b9 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp
@@ -11,7 +11,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
#include "video_core/renderer_vulkan/wrapper.h"
@@ -57,9 +56,9 @@ u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties,
} // Anonymous namespace
-VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler,
+VKStreamBuffer::VKStreamBuffer(const VKDevice& device_, VKScheduler& scheduler_,
VkBufferUsageFlags usage)
- : device{device}, scheduler{scheduler} {
+ : device{device_}, scheduler{scheduler_} {
CreateBuffers(usage);
ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE);
ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE);
@@ -111,7 +110,7 @@ void VKStreamBuffer::Unmap(u64 size) {
}
auto& watch = current_watches[current_watch_cursor++];
watch.upper_bound = offset;
- watch.fence.Watch(scheduler.GetFence());
+ watch.tick = scheduler.CurrentTick();
}
void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
@@ -121,7 +120,8 @@ void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) {
// Substract from the preferred heap size some bytes to avoid getting out of memory.
const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size;
- const VkDeviceSize allocable_size = heap_size - 9 * 1024 * 1024;
+ // As per DXVK's example, using `heap_size / 2`
+ const VkDeviceSize allocable_size = heap_size / 2;
buffer = device.GetLogical().CreateBuffer({
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -157,7 +157,7 @@ void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) {
while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) {
auto& watch = previous_watches[wait_cursor];
wait_bound = watch.upper_bound;
- watch.fence.Wait();
+ scheduler.Wait(watch.tick);
++wait_cursor;
}
}
diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h
index 689f0d276..5e15ad78f 100644
--- a/src/video_core/renderer_vulkan/vk_stream_buffer.h
+++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h
@@ -14,7 +14,6 @@
namespace Vulkan {
class VKDevice;
-class VKFence;
class VKFenceWatch;
class VKScheduler;
@@ -44,8 +43,8 @@ public:
}
private:
- struct Watch final {
- VKFenceWatch fence;
+ struct Watch {
+ u64 tick{};
u64 upper_bound{};
};
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index 6bfd2abae..9636a7c65 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -12,7 +12,7 @@
#include "core/core.h"
#include "core/frontend/framebuffer_layout.h"
#include "video_core/renderer_vulkan/vk_device.h"
-#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_swapchain.h"
#include "video_core/renderer_vulkan/wrapper.h"
@@ -56,8 +56,8 @@ VkExtent2D ChooseSwapExtent(const VkSurfaceCapabilitiesKHR& capabilities, u32 wi
} // Anonymous namespace
-VKSwapchain::VKSwapchain(VkSurfaceKHR surface, const VKDevice& device)
- : surface{surface}, device{device} {}
+VKSwapchain::VKSwapchain(VkSurfaceKHR surface_, const VKDevice& device_, VKScheduler& scheduler_)
+ : surface{surface_}, device{device_}, scheduler{scheduler_} {}
VKSwapchain::~VKSwapchain() = default;
@@ -75,21 +75,18 @@ void VKSwapchain::Create(u32 width, u32 height, bool srgb) {
CreateSemaphores();
CreateImageViews();
- fences.resize(image_count, nullptr);
+ resource_ticks.clear();
+ resource_ticks.resize(image_count);
}
void VKSwapchain::AcquireNextImage() {
device.GetLogical().AcquireNextImageKHR(*swapchain, std::numeric_limits<u64>::max(),
*present_semaphores[frame_index], {}, &image_index);
- if (auto& fence = fences[image_index]; fence) {
- fence->Wait();
- fence->Release();
- fence = nullptr;
- }
+ scheduler.Wait(resource_ticks[image_index]);
}
-bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
+bool VKSwapchain::Present(VkSemaphore render_semaphore) {
const VkSemaphore present_semaphore{*present_semaphores[frame_index]};
const std::array<VkSemaphore, 2> semaphores{present_semaphore, render_semaphore};
const auto present_queue{device.GetPresentQueue()};
@@ -123,8 +120,7 @@ bool VKSwapchain::Present(VkSemaphore render_semaphore, VKFence& fence) {
break;
}
- ASSERT(fences[image_index] == nullptr);
- fences[image_index] = &fence;
+ resource_ticks[image_index] = scheduler.CurrentTick();
frame_index = (frame_index + 1) % static_cast<u32>(image_count);
return recreated;
}
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index a35d61345..6b39befdf 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -16,11 +16,11 @@ struct FramebufferLayout;
namespace Vulkan {
class VKDevice;
-class VKFence;
+class VKScheduler;
class VKSwapchain {
public:
- explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device);
+ explicit VKSwapchain(VkSurfaceKHR surface, const VKDevice& device, VKScheduler& scheduler);
~VKSwapchain();
/// Creates (or recreates) the swapchain with a given size.
@@ -31,7 +31,7 @@ public:
/// Presents the rendered image to the swapchain. Returns true when the swapchains had to be
/// recreated. Takes responsability for the ownership of fence.
- bool Present(VkSemaphore render_semaphore, VKFence& fence);
+ bool Present(VkSemaphore render_semaphore);
/// Returns true when the framebuffer layout has changed.
bool HasFramebufferChanged(const Layout::FramebufferLayout& framebuffer) const;
@@ -74,6 +74,7 @@ private:
const VkSurfaceKHR surface;
const VKDevice& device;
+ VKScheduler& scheduler;
vk::SwapchainKHR swapchain;
@@ -81,7 +82,7 @@ private:
std::vector<VkImage> images;
std::vector<vk::ImageView> image_views;
std::vector<vk::Framebuffer> framebuffers;
- std::vector<VKFence*> fences;
+ std::vector<u64> resource_ticks;
std::vector<vk::Semaphore> present_semaphores;
u32 image_index{};
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 2c6f54101..f2c8f2ae1 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -188,12 +188,10 @@ u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, Tegra::Texture::Swizzl
} // Anonymous namespace
-CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+CachedSurface::CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
GPUVAddr gpu_addr, const SurfaceParams& params)
- : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, system{system},
- device{device}, resource_manager{resource_manager},
+ : SurfaceBase<View>{gpu_addr, params, device.IsOptimalAstcSupported()}, device{device},
memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {
if (params.IsBuffer()) {
buffer = CreateBuffer(device, params, host_memory_size);
@@ -490,19 +488,20 @@ VkImageView CachedSurfaceView::GetAttachment() {
return *render_target;
}
-VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool)
- : TextureCache(system, rasterizer, device.IsOptimalAstcSupported()), device{device},
- resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
- staging_pool{staging_pool} {}
+VKTextureCache::VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d,
+ Tegra::MemoryManager& gpu_memory, const VKDevice& device_,
+ VKMemoryManager& memory_manager_, VKScheduler& scheduler_,
+ VKStagingBufferPool& staging_pool_)
+ : TextureCache(rasterizer, maxwell3d, gpu_memory, device_.IsOptimalAstcSupported()),
+ device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
+ staging_pool_} {}
VKTextureCache::~VKTextureCache() = default;
Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
- return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
- scheduler, staging_pool, gpu_addr, params);
+ return std::make_shared<CachedSurface>(device, memory_manager, scheduler, staging_pool,
+ gpu_addr, params);
}
void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
index 807e26c8a..39202feba 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.h
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -15,10 +15,6 @@
#include "video_core/texture_cache/surface_base.h"
#include "video_core/texture_cache/texture_cache.h"
-namespace Core {
-class System;
-}
-
namespace VideoCore {
class RasterizerInterface;
}
@@ -27,7 +23,6 @@ namespace Vulkan {
class RasterizerVulkan;
class VKDevice;
-class VKResourceManager;
class VKScheduler;
class VKStagingBufferPool;
@@ -45,8 +40,7 @@ class CachedSurface final : public VideoCommon::SurfaceBase<View> {
friend CachedSurfaceView;
public:
- explicit CachedSurface(Core::System& system, const VKDevice& device,
- VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+ explicit CachedSurface(const VKDevice& device, VKMemoryManager& memory_manager,
VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
GPUVAddr gpu_addr, const SurfaceParams& params);
~CachedSurface();
@@ -101,9 +95,7 @@ private:
VkImageSubresourceRange GetImageSubresourceRange() const;
- Core::System& system;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
@@ -201,10 +193,10 @@ private:
class VKTextureCache final : public TextureCacheBase {
public:
- explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const VKDevice& device, VKResourceManager& resource_manager,
- VKMemoryManager& memory_manager, VKScheduler& scheduler,
- VKStagingBufferPool& staging_pool);
+ explicit VKTextureCache(VideoCore::RasterizerInterface& rasterizer,
+ Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory,
+ const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKTextureCache();
private:
@@ -219,7 +211,6 @@ private:
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
const VKDevice& device;
- VKResourceManager& resource_manager;
VKMemoryManager& memory_manager;
VKScheduler& scheduler;
VKStagingBufferPool& staging_pool;
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 56055af1b..c034558a3 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -6,6 +6,7 @@
#include <exception>
#include <memory>
#include <optional>
+#include <string_view>
#include <utility>
#include <vector>
@@ -18,21 +19,42 @@ namespace Vulkan::vk {
namespace {
+template <typename Func>
+void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld,
+ Func&& func) {
+ // Calling GetProperties calls Vulkan more than needed. But they are supposed to be cheap
+ // functions.
+ std::stable_sort(devices.begin(), devices.end(),
+ [&dld, &func](VkPhysicalDevice lhs, VkPhysicalDevice rhs) {
+ return func(vk::PhysicalDevice(lhs, dld).GetProperties(),
+ vk::PhysicalDevice(rhs, dld).GetProperties());
+ });
+}
+
+void SortPhysicalDevicesPerVendor(std::vector<VkPhysicalDevice>& devices,
+ const InstanceDispatch& dld,
+ std::initializer_list<u32> vendor_ids) {
+ for (auto it = vendor_ids.end(); it != vendor_ids.begin();) {
+ --it;
+ SortPhysicalDevices(devices, dld, [id = *it](const auto& lhs, const auto& rhs) {
+ return lhs.vendorID == id && rhs.vendorID != id;
+ });
+ }
+}
+
void SortPhysicalDevices(std::vector<VkPhysicalDevice>& devices, const InstanceDispatch& dld) {
- std::stable_sort(devices.begin(), devices.end(), [&](auto lhs, auto rhs) {
- // This will call Vulkan more than needed, but these calls are cheap.
- const auto lhs_properties = vk::PhysicalDevice(lhs, dld).GetProperties();
- const auto rhs_properties = vk::PhysicalDevice(rhs, dld).GetProperties();
-
- // Prefer discrete GPUs, Nvidia over AMD, AMD over Intel, Intel over the rest.
- const bool preferred =
- (lhs_properties.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
- rhs_properties.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) ||
- (lhs_properties.vendorID == 0x10DE && rhs_properties.vendorID != 0x10DE) ||
- (lhs_properties.vendorID == 0x1002 && rhs_properties.vendorID != 0x1002) ||
- (lhs_properties.vendorID == 0x8086 && rhs_properties.vendorID != 0x8086);
- return !preferred;
+ // Sort by name, this will set a base and make GPUs with higher numbers appear first
+ // (e.g. GTX 1650 will intentionally be listed before a GTX 1080).
+ SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
+ return std::string_view{lhs.deviceName} > std::string_view{rhs.deviceName};
});
+ // Prefer discrete over non-discrete
+ SortPhysicalDevices(devices, dld, [](const auto& lhs, const auto& rhs) {
+ return lhs.deviceType == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU &&
+ rhs.deviceType != VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
+ });
+ // Prefer Nvidia over AMD, AMD over Intel, Intel over the rest.
+ SortPhysicalDevicesPerVendor(devices, dld, {0x10DE, 0x1002, 0x8086});
}
template <typename T>
@@ -149,6 +171,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkGetFenceStatus);
X(vkGetImageMemoryRequirements);
X(vkGetQueryPoolResults);
+ X(vkGetSemaphoreCounterValueKHR);
X(vkMapMemory);
X(vkQueueSubmit);
X(vkResetFences);
@@ -157,6 +180,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkUpdateDescriptorSetWithTemplateKHR);
X(vkUpdateDescriptorSets);
X(vkWaitForFences);
+ X(vkWaitSemaphoresKHR);
#undef X
}
@@ -263,6 +287,22 @@ const char* ToString(VkResult result) noexcept {
return "VK_ERROR_INVALID_DEVICE_ADDRESS_EXT";
case VkResult::VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT:
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
+ case VkResult::VK_ERROR_UNKNOWN:
+ return "VK_ERROR_UNKNOWN";
+ case VkResult::VK_ERROR_INCOMPATIBLE_VERSION_KHR:
+ return "VK_ERROR_INCOMPATIBLE_VERSION_KHR";
+ case VkResult::VK_THREAD_IDLE_KHR:
+ return "VK_THREAD_IDLE_KHR";
+ case VkResult::VK_THREAD_DONE_KHR:
+ return "VK_THREAD_DONE_KHR";
+ case VkResult::VK_OPERATION_DEFERRED_KHR:
+ return "VK_OPERATION_DEFERRED_KHR";
+ case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
+ return "VK_OPERATION_NOT_DEFERRED_KHR";
+ case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
+ return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
+ case VkResult::VK_RESULT_MAX_ENUM:
+ return "VK_RESULT_MAX_ENUM";
}
return "Unknown";
}
@@ -558,7 +598,10 @@ Semaphore Device::CreateSemaphore() const {
.pNext = nullptr,
.flags = 0,
};
+ return CreateSemaphore(ci);
+}
+Semaphore Device::CreateSemaphore(const VkSemaphoreCreateInfo& ci) const {
VkSemaphore object;
Check(dld->vkCreateSemaphore(handle, &ci, nullptr, &object));
return Semaphore(object, handle, *dld);
@@ -644,7 +687,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
return ShaderModule(object, handle, *dld);
}
-Event Device::CreateNewEvent() const {
+Event Device::CreateEvent() const {
static constexpr VkEventCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr,
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index 748a94d2f..f64919623 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -267,6 +267,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkGetFenceStatus vkGetFenceStatus;
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
+ PFN_vkGetSemaphoreCounterValueKHR vkGetSemaphoreCounterValueKHR;
PFN_vkMapMemory vkMapMemory;
PFN_vkQueueSubmit vkQueueSubmit;
PFN_vkResetFences vkResetFences;
@@ -275,6 +276,7 @@ struct DeviceDispatch : public InstanceDispatch {
PFN_vkUpdateDescriptorSetWithTemplateKHR vkUpdateDescriptorSetWithTemplateKHR;
PFN_vkUpdateDescriptorSets vkUpdateDescriptorSets;
PFN_vkWaitForFences vkWaitForFences;
+ PFN_vkWaitSemaphoresKHR vkWaitSemaphoresKHR;
};
/// Loads instance agnostic function pointers.
@@ -550,7 +552,6 @@ using PipelineLayout = Handle<VkPipelineLayout, VkDevice, DeviceDispatch>;
using QueryPool = Handle<VkQueryPool, VkDevice, DeviceDispatch>;
using RenderPass = Handle<VkRenderPass, VkDevice, DeviceDispatch>;
using Sampler = Handle<VkSampler, VkDevice, DeviceDispatch>;
-using Semaphore = Handle<VkSemaphore, VkDevice, DeviceDispatch>;
using ShaderModule = Handle<VkShaderModule, VkDevice, DeviceDispatch>;
using SurfaceKHR = Handle<VkSurfaceKHR, VkInstance, InstanceDispatch>;
@@ -582,7 +583,8 @@ public:
/// Construct a queue handle.
constexpr Queue(VkQueue queue, const DeviceDispatch& dld) noexcept : queue{queue}, dld{&dld} {}
- VkResult Submit(Span<VkSubmitInfo> submit_infos, VkFence fence) const noexcept {
+ VkResult Submit(Span<VkSubmitInfo> submit_infos,
+ VkFence fence = VK_NULL_HANDLE) const noexcept {
return dld->vkQueueSubmit(queue, submit_infos.size(), submit_infos.data(), fence);
}
@@ -674,6 +676,44 @@ public:
}
};
+class Semaphore : public Handle<VkSemaphore, VkDevice, DeviceDispatch> {
+ using Handle<VkSemaphore, VkDevice, DeviceDispatch>::Handle;
+
+public:
+ [[nodiscard]] u64 GetCounter() const {
+ u64 value;
+ Check(dld->vkGetSemaphoreCounterValueKHR(owner, handle, &value));
+ return value;
+ }
+
+ /**
+ * Waits for a timeline semaphore on the host.
+ *
+ * @param value Value to wait
+ * @param timeout Time in nanoseconds to timeout
+ * @return True on successful wait, false on timeout
+ */
+ bool Wait(u64 value, u64 timeout = std::numeric_limits<u64>::max()) const {
+ const VkSemaphoreWaitInfoKHR wait_info{
+ .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR,
+ .pNext = nullptr,
+ .flags = 0,
+ .semaphoreCount = 1,
+ .pSemaphores = &handle,
+ .pValues = &value,
+ };
+ const VkResult result = dld->vkWaitSemaphoresKHR(owner, &wait_info, timeout);
+ switch (result) {
+ case VK_SUCCESS:
+ return true;
+ case VK_TIMEOUT:
+ return false;
+ default:
+ throw Exception(result);
+ }
+ }
+};
+
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
@@ -694,6 +734,8 @@ public:
Semaphore CreateSemaphore() const;
+ Semaphore CreateSemaphore(const VkSemaphoreCreateInfo& ci) const;
+
Fence CreateFence(const VkFenceCreateInfo& ci) const;
DescriptorPool CreateDescriptorPool(const VkDescriptorPoolCreateInfo& ci) const;
@@ -721,7 +763,7 @@ public:
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
- Event CreateNewEvent() const;
+ Event CreateEvent() const;
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
index cca13bcde..8e5a22ab3 100644
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@@ -199,55 +199,48 @@ public:
}
std::optional<u32> GetGotoLabel() const {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTGoto>(&data)) {
return {inner->label};
}
- return {};
+ return std::nullopt;
}
Expr GetGotoCondition() const {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTGoto>(&data)) {
return inner->condition;
}
return nullptr;
}
void MarkLabelUnused() {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (auto* inner = std::get_if<ASTLabel>(&data)) {
inner->unused = true;
}
}
bool IsLabelUnused() const {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTLabel>(&data)) {
return inner->unused;
}
return true;
}
std::optional<u32> GetLabelIndex() const {
- auto inner = std::get_if<ASTLabel>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTLabel>(&data)) {
return {inner->index};
}
- return {};
+ return std::nullopt;
}
Expr GetIfCondition() const {
- auto inner = std::get_if<ASTIfThen>(&data);
- if (inner) {
+ if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
return inner->condition;
}
return nullptr;
}
void SetGotoCondition(Expr new_condition) {
- auto inner = std::get_if<ASTGoto>(&data);
- if (inner) {
+ if (auto* inner = std::get_if<ASTGoto>(&data)) {
inner->condition = std::move(new_condition);
}
}
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
index f815584f7..aabd62c5c 100644
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@@ -73,11 +73,11 @@ void AsyncShaders::KillWorkers() {
worker_threads.clear();
}
-bool AsyncShaders::HasWorkQueued() {
+bool AsyncShaders::HasWorkQueued() const {
return !pending_queue.empty();
}
-bool AsyncShaders::HasCompletedWork() {
+bool AsyncShaders::HasCompletedWork() const {
std::shared_lock lock{completed_mutex};
return !finished_work.empty();
}
@@ -102,7 +102,7 @@ bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
}
std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
- std::vector<AsyncShaders::Result> results;
+ std::vector<Result> results;
{
std::unique_lock lock{completed_mutex};
results.assign(std::make_move_iterator(finished_work.begin()),
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
index d5ae814d5..7a99e1dc5 100644
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@@ -5,11 +5,21 @@
#pragma once
#include <condition_variable>
-#include <deque>
#include <memory>
#include <shared_mutex>
#include <thread>
-#include "common/bit_field.h"
+
+// This header includes both Vulkan and OpenGL headers, this has to be fixed
+// Unfortunately, including OpenGL will include Windows.h that defines macros that can cause issues.
+// Forcefully include glad early and undefine macros
+#include <glad/glad.h>
+#ifdef CreateEvent
+#undef CreateEvent
+#endif
+#ifdef CreateSemaphore
+#undef CreateSemaphore
+#endif
+
#include "common/common_types.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -17,7 +27,6 @@
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
-#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Core::Frontend {
class EmuWindow;
@@ -70,20 +79,20 @@ public:
void KillWorkers();
/// Check to see if any shaders have actually been compiled
- bool HasCompletedWork();
+ [[nodiscard]] bool HasCompletedWork() const;
/// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
/// every shader async as some shaders are only built and executed once. We try to "guess" which
/// shader would be used only once
- bool IsShaderAsync(const Tegra::GPU& gpu) const;
+ [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
/// Pulls completed compiled shaders
- std::vector<Result> GetCompletedWork();
+ [[nodiscard]] std::vector<Result> GetCompletedWork();
void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
- VideoCommon::Shader::CompilerSettings compiler_settings,
- const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
+ CompilerSettings compiler_settings, const Registry& registry,
+ VAddr cpu_addr);
void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
Vulkan::VKScheduler& scheduler,
@@ -97,7 +106,7 @@ private:
void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
/// Check our worker queue to see if we have any work queued already
- bool HasWorkQueued();
+ [[nodiscard]] bool HasWorkQueued() const;
struct WorkerParams {
Backend backend;
@@ -108,8 +117,8 @@ private:
std::vector<u64> code;
std::vector<u64> code_b;
u32 main_offset;
- VideoCommon::Shader::CompilerSettings compiler_settings;
- std::optional<VideoCommon::Shader::Registry> registry;
+ CompilerSettings compiler_settings;
+ std::optional<Registry> registry;
VAddr cpu_address;
// For Vulkan
@@ -125,13 +134,13 @@ private:
};
std::condition_variable cv;
- std::mutex queue_mutex;
- std::shared_mutex completed_mutex;
+ mutable std::mutex queue_mutex;
+ mutable std::shared_mutex completed_mutex;
std::atomic<bool> is_thread_exiting{};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
std::vector<std::thread> worker_threads;
std::queue<WorkerParams> pending_queue;
- std::vector<AsyncShaders::Result> finished_work;
+ std::vector<Result> finished_work;
Core::Frontend::EmuWindow& emu_window;
};
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 336397cdb..4c8971615 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -547,13 +547,13 @@ bool TryQuery(CFGRebuildState& state) {
gather_labels(q2.ssy_stack, state.ssy_labels, block);
gather_labels(q2.pbk_stack, state.pbk_labels, block);
if (std::holds_alternative<SingleBranch>(*block.branch)) {
- const auto branch = std::get_if<SingleBranch>(block.branch.get());
+ auto* branch = std::get_if<SingleBranch>(block.branch.get());
if (!branch->condition.IsUnconditional()) {
q2.address = block.end + 1;
state.queries.push_back(q2);
}
- Query conditional_query{q2};
+ auto& conditional_query = state.queries.emplace_back(q2);
if (branch->is_sync) {
if (branch->address == unassigned_branch) {
branch->address = conditional_query.ssy_stack.top();
@@ -567,21 +567,21 @@ bool TryQuery(CFGRebuildState& state) {
conditional_query.pbk_stack.pop();
}
conditional_query.address = branch->address;
- state.queries.push_back(std::move(conditional_query));
return true;
}
- const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+
+ const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
for (const auto& branch_case : multi_branch->branches) {
- Query conditional_query{q2};
+ auto& conditional_query = state.queries.emplace_back(q2);
conditional_query.address = branch_case.address;
- state.queries.push_back(std::move(conditional_query));
}
+
return true;
}
void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
- const auto get_expr = ([&](const Condition& cond) -> Expr {
- Expr result{};
+ const auto get_expr = [](const Condition& cond) -> Expr {
+ Expr result;
if (cond.cc != ConditionCode::T) {
result = MakeExpr<ExprCondCode>(cond.cc);
}
@@ -594,10 +594,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
}
Expr extra = MakeExpr<ExprPredicate>(pred);
if (negate) {
- extra = MakeExpr<ExprNot>(extra);
+ extra = MakeExpr<ExprNot>(std::move(extra));
}
if (result) {
- return MakeExpr<ExprAnd>(extra, result);
+ return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
}
return extra;
}
@@ -605,9 +605,10 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
return result;
}
return MakeExpr<ExprBoolean>(true);
- });
+ };
+
if (std::holds_alternative<SingleBranch>(*branch_info)) {
- const auto branch = std::get_if<SingleBranch>(branch_info.get());
+ const auto* branch = std::get_if<SingleBranch>(branch_info.get());
if (branch->address < 0) {
if (branch->kill) {
mm.InsertReturn(get_expr(branch->condition), true);
@@ -619,7 +620,7 @@ void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
mm.InsertGoto(get_expr(branch->condition), branch->address);
return;
}
- const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+ const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
for (const auto& branch_case : multi_branch->branches) {
mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
branch_case.address);
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index a276aee44..88103fede 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -53,6 +53,9 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
absolute_a = ((instr.value >> 44) & 1) != 0;
absolute_b = ((instr.value >> 54) & 1) != 0;
break;
+ default:
+ UNREACHABLE();
+ break;
}
Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
diff --git a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
index 73880db0e..2a30aab2b 100644
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@@ -28,23 +28,26 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
case OpCode::Id::IADD32I: {
UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
- op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd32i.negate_a, true);
+ op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
- const Node value = Operation(OperationCode::IAdd, PRECISE, op_a, op_b);
+ Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
- SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc);
- SetRegister(bb, instr.gpr0, value);
+ SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
+ SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::LOP32I: {
- if (instr.alu.lop32i.invert_a)
- op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
+ if (instr.alu.lop32i.invert_a) {
+ op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
+ }
- if (instr.alu.lop32i.invert_b)
- op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
+ if (instr.alu.lop32i.invert_b) {
+ op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
+ }
- WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
- PredicateResultMode::None, Pred::UnusedIndex, instr.op_32.generates_cc);
+ WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
+ std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
+ instr.op_32.generates_cc != 0);
break;
}
default:
@@ -58,14 +61,14 @@ u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
Node op_b, PredicateResultMode predicate_mode, Pred predicate,
bool sets_cc) {
- const Node result = [&]() {
+ Node result = [&] {
switch (logic_op) {
case LogicOperation::And:
- return Operation(OperationCode::IBitwiseAnd, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::Or:
- return Operation(OperationCode::IBitwiseOr, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::Xor:
- return Operation(OperationCode::IBitwiseXor, PRECISE, op_a, op_b);
+ return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
case LogicOperation::PassB:
return op_b;
default:
@@ -84,8 +87,8 @@ void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation
return;
case PredicateResultMode::NotZero: {
// Set the predicate to true if the result is not zero.
- const Node compare = Operation(OperationCode::LogicalINotEqual, result, Immediate(0));
- SetPredicate(bb, static_cast<u64>(predicate), compare);
+ Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
+ SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
break;
}
default:
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index e75ca4fdb..618d309d2 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -119,6 +119,8 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
return descriptor.r_type;
}
break;
+ default:
+ break;
}
UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
return ComponentType::FLOAT;
@@ -220,9 +222,10 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
return (component == 0 || component == 1) ? 8 : 0;
case TextureFormat::G4R4:
return (component == 0 || component == 1) ? 4 : 0;
+ default:
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+ return 0;
}
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return 0;
}
std::size_t GetImageComponentMask(TextureFormat format) {
@@ -257,9 +260,10 @@ std::size_t GetImageComponentMask(TextureFormat format) {
case TextureFormat::R8:
case TextureFormat::R1:
return std::size_t{R};
+ default:
+ UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
+ return std::size_t{R | G | B | A};
}
- UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
- return std::size_t{R | G | B | A};
}
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
@@ -463,7 +467,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
return OperationCode::AtomicImageXor;
case Tegra::Shader::ImageAtomicOperation::Exch:
return OperationCode::AtomicImageExchange;
+ default:
+ break;
}
+ break;
default:
break;
}
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index e4739394d..e2bba88dd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
break;
}
case OpCode::Id::RED: {
- UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+ UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+ static_cast<int>(instr.red.type.Value()));
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 29ebf65ba..4e932a4b6 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -292,33 +292,36 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
break;
}
- std::vector<Node> coords;
-
- // TODO: Add coordinates for different samplers once other texture types are implemented.
- switch (texture_type) {
- case TextureType::Texture1D:
- coords.push_back(GetRegister(instr.gpr8));
- break;
- case TextureType::Texture2D:
- coords.push_back(GetRegister(instr.gpr8.Value() + 0));
- coords.push_back(GetRegister(instr.gpr8.Value() + 1));
- break;
- default:
- UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type));
+ const u64 base_index = is_array ? 1 : 0;
+ const u64 num_components = [texture_type] {
+ switch (texture_type) {
+ case TextureType::Texture1D:
+ return 1;
+ case TextureType::Texture2D:
+ return 2;
+ case TextureType::TextureCube:
+ return 3;
+ default:
+ UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<int>(texture_type));
+ return 2;
+ }
+ }();
+ // TODO: What's the array component used for?
- // Fallback to interpreting as a 2D texture for now
- coords.push_back(GetRegister(instr.gpr8.Value() + 0));
- coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+ std::vector<Node> coords;
+ coords.reserve(num_components);
+ for (u64 component = 0; component < num_components; ++component) {
+ coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
}
+
u32 indexer = 0;
for (u32 element = 0; element < 2; ++element) {
if (!instr.tmml.IsComponentEnabled(element)) {
continue;
}
- auto params = coords;
MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
- const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
- SetTemporary(bb, indexer++, value);
+ Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
+ SetTemporary(bb, indexer++, std::move(value));
}
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
@@ -763,7 +766,7 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
const auto texture_type{instr.tld.texture_type};
- const bool is_array{instr.tld.is_array};
+ const bool is_array{instr.tld.is_array != 0};
const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
const std::size_t coord_count{GetCoordCount(texture_type)};
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 5071c83ca..e18ccba8e 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -16,11 +16,10 @@
namespace VideoCommon::Shader {
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
- const auto& gpu{system.GPU().Maxwell3D()};
- const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
- return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+ const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+ return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
}
bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
index be90d24fd..4624d38e6 100644
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -11,10 +11,6 @@
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
-namespace Core {
-class System;
-}
-
namespace Tegra {
class MemoryManager;
}
@@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
constexpr u32 KERNEL_MAIN_OFFSET = 0;
/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
/// Gets if the current instruction offset is a scheduler instruction
diff --git a/src/video_core/shader/registry.cpp b/src/video_core/shader/registry.cpp
index cdf274e54..148d91fcb 100644
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@@ -24,44 +24,45 @@ GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterfac
if (shader_stage == ShaderType::Compute) {
return {};
}
- auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine);
-
- GraphicsInfo info;
- info.tfb_layouts = graphics.regs.tfb_layouts;
- info.tfb_varying_locs = graphics.regs.tfb_varying_locs;
- info.primitive_topology = graphics.regs.draw.topology;
- info.tessellation_primitive = graphics.regs.tess_mode.prim;
- info.tessellation_spacing = graphics.regs.tess_mode.spacing;
- info.tfb_enabled = graphics.regs.tfb_enabled;
- info.tessellation_clockwise = graphics.regs.tess_mode.cw;
- return info;
+
+ auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
+
+ return {
+ .tfb_layouts = graphics.regs.tfb_layouts,
+ .tfb_varying_locs = graphics.regs.tfb_varying_locs,
+ .primitive_topology = graphics.regs.draw.topology,
+ .tessellation_primitive = graphics.regs.tess_mode.prim,
+ .tessellation_spacing = graphics.regs.tess_mode.spacing,
+ .tfb_enabled = graphics.regs.tfb_enabled != 0,
+ .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
+ };
}
ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
if (shader_stage != ShaderType::Compute) {
return {};
}
- auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine);
+
+ auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
const auto& launch = compute.launch_description;
- ComputeInfo info;
- info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z};
- info.local_memory_size_in_words = launch.local_pos_alloc;
- info.shared_memory_size_in_words = launch.shared_alloc;
- return info;
+ return {
+ .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
+ .shared_memory_size_in_words = launch.shared_alloc,
+ .local_memory_size_in_words = launch.local_pos_alloc,
+ };
}
} // Anonymous namespace
-Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info)
+Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
: stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
-Registry::Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine)
- : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()},
- graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo(
- shader_stage, engine)} {}
+Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
+ : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
+ graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
+ shader_stage, engine_)} {}
Registry::~Registry() = default;
@@ -113,8 +114,7 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler
return value;
}
-std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
- u32 offset) {
+std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
const std::pair key = {buffer, offset};
const auto iter = bindless_samplers.find(key);
if (iter != bindless_samplers.end()) {
diff --git a/src/video_core/shader/registry.h b/src/video_core/shader/registry.h
index 231206765..4bebefdde 100644
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@@ -94,7 +94,7 @@ public:
explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
explicit Registry(Tegra::Engines::ShaderType shader_stage,
- Tegra::Engines::ConstBufferEngineInterface& engine);
+ Tegra::Engines::ConstBufferEngineInterface& engine_);
~Registry();
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index d5ed81442..6be3ea92b 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -205,12 +205,12 @@ std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code,
const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
const auto& found = result.first;
if (!found) {
- return {};
+ return std::nullopt;
}
if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
return immediate->GetValue();
}
- return {};
+ return std::nullopt;
}
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index dfcf36e0b..b44c09d71 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -115,20 +115,24 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
if (gpu_addr == candidate_gpu_addr) {
return {{0, 0}};
}
+
if (candidate_gpu_addr < gpu_addr) {
- return {};
+ return std::nullopt;
}
+
const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
const auto layer{static_cast<u32>(relative_address / layer_size)};
if (layer >= params.depth) {
- return {};
+ return std::nullopt;
}
+
const GPUVAddr mipmap_address = relative_address - layer_size * layer;
const auto mipmap_it =
Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
if (mipmap_it == mipmap_offsets.end()) {
- return {};
+ return std::nullopt;
}
+
const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))};
return std::make_pair(layer, level);
}
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index e614a92df..e8515321b 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -163,13 +163,11 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl
return params;
}
-SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
+SurfaceParams SurfaceParams::CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d) {
+ const auto& regs = maxwell3d.regs;
const auto block_depth = std::min(regs.zeta.memory_layout.block_depth.Value(), 5U);
const bool is_layered = regs.zeta_layers > 1 && block_depth == 0;
const auto pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
-
return {
.is_tiled = regs.zeta.memory_layout.type ==
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear,
@@ -191,8 +189,9 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) {
};
}
-SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) {
- const auto& config{system.GPU().Maxwell3D().regs.rt[index]};
+SurfaceParams SurfaceParams::CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+ std::size_t index) {
+ const auto& config{maxwell3d.regs.rt[index]};
SurfaceParams params;
params.is_tiled =
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear;
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 118aa689e..4466c3c34 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -33,10 +33,11 @@ public:
const VideoCommon::Shader::Image& entry);
/// Creates SurfaceCachedParams for a depth buffer configuration.
- static SurfaceParams CreateForDepthBuffer(Core::System& system);
+ static SurfaceParams CreateForDepthBuffer(Tegra::Engines::Maxwell3D& maxwell3d);
/// Creates SurfaceCachedParams from a framebuffer configuration.
- static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index);
+ static SurfaceParams CreateForFramebuffer(Tegra::Engines::Maxwell3D& maxwell3d,
+ std::size_t index);
/// Creates SurfaceCachedParams from a Fermi2D surface configuration.
static SurfaceParams CreateForFermiCopySurface(
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 96c4e4cc2..ea835c59f 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -135,8 +135,7 @@ public:
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
@@ -160,8 +159,7 @@ public:
if (!gpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
return GetNullSurface(SurfaceParams::ExpectedTarget(entry));
}
@@ -183,11 +181,11 @@ public:
TView GetDepthBufferSurface(bool preserve_contents) {
std::lock_guard lock{mutex};
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
+ auto& dirty = maxwell3d.dirty;
+ if (!dirty.flags[VideoCommon::Dirty::ZetaBuffer]) {
return depth_buffer.view;
}
- maxwell3d.dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
+ dirty.flags[VideoCommon::Dirty::ZetaBuffer] = false;
const auto& regs{maxwell3d.regs};
const auto gpu_addr{regs.zeta.Address()};
@@ -195,13 +193,12 @@ public:
SetEmptyDepthBuffer();
return {};
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyDepthBuffer();
return {};
}
- const auto depth_params{SurfaceParams::CreateForDepthBuffer(system)};
+ const auto depth_params{SurfaceParams::CreateForDepthBuffer(maxwell3d)};
auto surface_view = GetSurface(gpu_addr, *cpu_addr, depth_params, preserve_contents, true);
if (depth_buffer.target)
depth_buffer.target->MarkAsRenderTarget(false, NO_RT);
@@ -215,7 +212,6 @@ public:
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) {
std::lock_guard lock{mutex};
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
- auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::ColorBuffer0 + index]) {
return render_targets[index].view;
}
@@ -235,15 +231,14 @@ public:
return {};
}
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
SetEmptyColorBuffer(index);
return {};
}
auto surface_view =
- GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
+ GetSurface(gpu_addr, *cpu_addr, SurfaceParams::CreateForFramebuffer(maxwell3d, index),
preserve_contents, true);
if (render_targets[index].target) {
auto& surface = render_targets[index].target;
@@ -300,9 +295,8 @@ public:
const GPUVAddr dst_gpu_addr = dst_config.Address();
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
- const auto& memory_manager = system.GPU().MemoryManager();
- const std::optional<VAddr> dst_cpu_addr = memory_manager.GpuToCpuAddress(dst_gpu_addr);
- const std::optional<VAddr> src_cpu_addr = memory_manager.GpuToCpuAddress(src_gpu_addr);
+ const std::optional<VAddr> dst_cpu_addr = gpu_memory.GpuToCpuAddress(dst_gpu_addr);
+ const std::optional<VAddr> src_cpu_addr = gpu_memory.GpuToCpuAddress(src_gpu_addr);
std::pair dst_surface = GetSurface(dst_gpu_addr, *dst_cpu_addr, dst_params, true, false);
TView src_surface = GetSurface(src_gpu_addr, *src_cpu_addr, src_params, true, false).second;
ImageBlit(src_surface, dst_surface.second, copy_config);
@@ -358,9 +352,11 @@ public:
}
protected:
- explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- bool is_astc_supported)
- : system{system}, is_astc_supported{is_astc_supported}, rasterizer{rasterizer} {
+ explicit TextureCache(VideoCore::RasterizerInterface& rasterizer_,
+ Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
+ bool is_astc_supported_)
+ : is_astc_supported{is_astc_supported_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
+ gpu_memory{gpu_memory_} {
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
SetEmptyColorBuffer(i);
}
@@ -395,7 +391,7 @@ protected:
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
void ManageRenderTargetUnregister(TSurface& surface) {
- auto& dirty = system.GPU().Maxwell3D().dirty;
+ auto& dirty = maxwell3d.dirty;
const u32 index = surface->GetRenderTarget();
if (index == DEPTH_RT) {
dirty.flags[VideoCommon::Dirty::ZetaBuffer] = true;
@@ -408,8 +404,7 @@ protected:
void Register(TSurface surface) {
const GPUVAddr gpu_addr = surface->GetGpuAddr();
const std::size_t size = surface->GetSizeInBytes();
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}",
gpu_addr);
@@ -459,7 +454,6 @@ protected:
return new_surface;
}
- Core::System& system;
const bool is_astc_supported;
private:
@@ -954,8 +948,7 @@ private:
* @param params The parameters on the candidate surface.
**/
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
- const std::optional<VAddr> cpu_addr =
- system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr);
+ const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
if (!cpu_addr) {
Deduction result{};
@@ -1112,7 +1105,7 @@ private:
void LoadSurface(const TSurface& surface) {
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
- surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache);
+ surface->LoadBuffer(gpu_memory, staging_cache);
surface->UploadTexture(staging_cache.GetBuffer(0));
surface->MarkAsModified(false, Tick());
}
@@ -1123,7 +1116,7 @@ private:
}
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes());
surface->DownloadTexture(staging_cache.GetBuffer(0));
- surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache);
+ surface->FlushBuffer(gpu_memory, staging_cache);
surface->MarkAsModified(false, Tick());
}
@@ -1253,6 +1246,8 @@ private:
}
VideoCore::RasterizerInterface& rasterizer;
+ Tegra::Engines::Maxwell3D& maxwell3d;
+ Tegra::MemoryManager& gpu_memory;
FormatLookupTable format_lookup_table;
FormatCompatibility format_compatibility;
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 45f360bdd..a14df06a3 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include <memory>
+
#include "common/logging/log.h"
#include "core/core.h"
#include "core/settings.h"
@@ -16,37 +17,49 @@
#include "video_core/video_core.h"
namespace {
-std::unique_ptr<VideoCore::RendererBase> CreateRenderer(Core::Frontend::EmuWindow& emu_window,
- Core::System& system,
- Core::Frontend::GraphicsContext& context) {
+
+std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
+ Core::System& system, Core::Frontend::EmuWindow& emu_window, Tegra::GPU& gpu,
+ std::unique_ptr<Core::Frontend::GraphicsContext> context) {
+ auto& telemetry_session = system.TelemetrySession();
+ auto& cpu_memory = system.Memory();
+
switch (Settings::values.renderer_backend.GetValue()) {
case Settings::RendererBackend::OpenGL:
- return std::make_unique<OpenGL::RendererOpenGL>(emu_window, system, context);
+ return std::make_unique<OpenGL::RendererOpenGL>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
#ifdef HAS_VULKAN
case Settings::RendererBackend::Vulkan:
- return std::make_unique<Vulkan::RendererVulkan>(emu_window, system);
+ return std::make_unique<Vulkan::RendererVulkan>(telemetry_session, emu_window, cpu_memory,
+ gpu, std::move(context));
#endif
default:
return nullptr;
}
}
+
} // Anonymous namespace
namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
+ std::unique_ptr<Tegra::GPU> gpu;
+ if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
+ gpu = std::make_unique<VideoCommon::GPUAsynch>(system);
+ } else {
+ gpu = std::make_unique<VideoCommon::GPUSynch>(system);
+ }
+
auto context = emu_window.CreateSharedContext();
const auto scope = context->Acquire();
- auto renderer = CreateRenderer(emu_window, system, *context);
+
+ auto renderer = CreateRenderer(system, emu_window, *gpu, std::move(context));
if (!renderer->Init()) {
return nullptr;
}
- if (Settings::values.use_asynchronous_gpu_emulation.GetValue()) {
- return std::make_unique<VideoCommon::GPUAsynch>(system, std::move(renderer),
- std::move(context));
- }
- return std::make_unique<VideoCommon::GPUSynch>(system, std::move(renderer), std::move(context));
+ gpu->BindRenderer(std::move(renderer));
+ return gpu;
}
u16 GetResolutionScaleFactor(const RendererBase& renderer) {