// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/control/channel_state.h"
#include "video_core/engines/draw_manager.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/host1x/gpu_device_memory_manager.h"
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using MaxwellDrawState = Tegra::Engines::DrawManager::State;
using VideoCommon::ImageViewId;
using VideoCommon::ImageViewType;
MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
namespace {
struct DrawParams {
u32 base_instance;
u32 num_instances;
u32 base_vertex;
u32 num_vertices;
u32 first_index;
bool is_indexed;
};
VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t index, float scale) {
const auto& src = regs.viewport_transform[index];
const auto conv = [scale](float value) {
float new_value = value * scale;
if (scale < 1.0f) {
const bool sign = std::signbit(value);
new_value = std::round(std::abs(new_value));
new_value = sign ? -new_value : new_value;
}
return new_value;
};
const float x = conv(src.translate_x - src.scale_x);
const float width = conv(src.scale_x * 2.0f);
float y = conv(src.translate_y - src.scale_y);
float height = conv(src.scale_y * 2.0f);
const bool lower_left = regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft;
const bool y_negate = !device.IsNvViewportSwizzleSupported() &&
src.swizzle.y == Maxwell::ViewportSwizzle::NegativeY;
if (lower_left) {
// Flip by surface clip height
y += conv(static_cast<f32>(regs.surface_clip.height));
height = -height;
}
if (y_negate) {
// Flip by viewport height
y += height;
height = -height;
}
const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1.0f : 0.0f;
VkViewport viewport{
.x = x,
.y = y,
.width = width != 0.0f ? width : 1.0f,
.height = height != 0.0f ? height : 1.0f,
.minDepth = src.translate_z - src.scale_z * reduce_z,
.maxDepth = src.translate_z + src.scale_z,
};
if (!device.IsExtDepthRangeUnrestrictedSupported()) {
viewport.minDepth = std::clamp(viewport.minDepth, 0.0f, 1.0f);
viewport.maxDepth = std::clamp(viewport.maxDepth, 0.0f, 1.0f);
}
return viewport;
}
VkRect2D GetScissorState(const Maxwell& regs, size_t index, u32 up_scale = 1, u32 down_shift = 0) {
const auto& src = regs.scissor_test[index];
VkRect2D scissor;
const auto scale_up = [&](s32 value) -> s32 {
if (value == 0) {
return 0U;
}
const s32 upset = value * up_scale;
s32 acumm = 0;
if ((up_scale >> down_shift) == 0) {
acumm = upset % 2;
}
const s32 converted_value = (value * up_scale) >> down_shift;
return value < 0 ? std::min<s32>(converted_value - acumm, -1)
: std::max<s32>(converted_value + acumm, 1);
};
const bool lower_left = regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft;
const s32 clip_height = regs.surface_clip.height;
// Flip coordinates if lower left
s32 min_y = lower_left ? (clip_height - src.max_y) : src.min_y.Value();
s32 max_y = lower_left ? (clip_height - src.min_y) : src.max_y.Value();
// Bound to render area
min_y = std::max(min_y, 0);
max_y = std::max(max_y, 0);
if (src.enable) {
scissor.offset.x = scale_up(src.min_x);
scissor.offset.y = scale_up(min_y);
scissor.extent.width = scale_up(src.max_x - src.min_x);
scissor.extent.height = scale_up(max_y - min_y);
} else {
scissor.offset.x = 0;
scissor.offset.y = 0;
scissor.extent.width = std::numeric_limits<s32>::max();
scissor.extent.height = std::numeric_limits<s32>::max();
}
return scissor;
}
DrawParams MakeDrawParams(const MaxwellDrawState& draw_state, u32 num_instances, bool is_indexed) {
DrawParams params{
.base_instance = draw_state.base_instance,
.num_instances = num_instances,
.base_vertex = is_indexed ? draw_state.base_index : draw_state.vertex_buffer.first,
.num_vertices = is_indexed ? draw_state.index_buffer.count : draw_state.vertex_buffer.count,
.first_index = is_indexed ? draw_state.index_buffer.first : 0,
.is_indexed = is_indexed,
};
// 6 triangle vertices per quad, base vertex is part of the index
// See BindQuadIndexBuffer for more details
if (draw_state.topology == Maxwell::PrimitiveTopology::Quads) {
params.num_vertices = (params.num_vertices / 4) * 6;
params.base_vertex = 0;
params.is_indexed = true;
} else if (draw_state.topology == Maxwell::PrimitiveTopology::QuadStrip) {
params.num_vertices = (params.num_vertices - 2) / 2 * 6;
params.base_vertex = 0;
params.is_indexed = true;
}
return params;
}
} // Anonymous namespace
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MaxwellDeviceMemoryManager& device_memory_,
const Device& device_, MemoryAllocator& memory_allocator_,
StateTracker& state_tracker_, Scheduler& scheduler_)
: gpu{gpu_}, device_memory{device_memory_}, device{device_},
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
guest_descriptor_queue(device, scheduler), compute_pass_descriptor_queue(device, scheduler),
blit_image(device, scheduler, state_tracker, descriptor_pool), render_pass_cache(device),
texture_cache_runtime{
device, scheduler, memory_allocator, staging_pool,
blit_image, render_pass_cache, descriptor_pool, compute_pass_descriptor_queue},
texture_cache(texture_cache_runtime, device_memory),
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool),
buffer_cache(device_memory, buffer_cache_runtime),
query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler,
staging_pool, compute_pass_descriptor_queue, descriptor_pool),
query_cache(gpu, *this, device_memory, query_cache_runtime),
pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
accelerate_dma(buffer_cache, texture_cache, scheduler),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
wfi_event(device.GetLogical().CreateEvent()) {
scheduler.SetQueryCache(query_cache);
}
RasterizerVulkan::~RasterizerVulkan() = default;
template <typename Func>
void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT {
gpu.TickWork();
};
FlushWork();
gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
}
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
// update engine as channel may be different.
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
UpdateDynamicStates();
HandleTransformFeedback();
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
draw_func();
}
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count] {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const u32 num_instances{instance_count};
const DrawParams draw_params{MakeDrawParams(draw_state, num_instances, is_indexed)};
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
if (draw_params.is_indexed) {
cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances,
draw_params.first_index, draw_params.base_vertex,
draw_params.base_instance);
} else {
cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
draw_params.base_vertex, draw_params.base_instance);
}
});
});
}
void RasterizerVulkan::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
buffer_cache.SetDrawIndirect(¶ms);
PrepareDraw(params.is_indexed, [this, ¶ms] {
const auto indirect_buffer = buffer_cache.GetDrawIndirectBuffer();
const auto& buffer = indirect_buffer.first;
const auto& offset = indirect_buffer.second;
if (params.is_byte_count) {
scheduler.Record([buffer_obj = buffer->Handle(), offset,
stride = params.stride](vk::CommandBuffer cmdbuf) {
cmdbuf.DrawIndirectByteCountEXT(1, 0, buffer_obj, offset, 0,
static_cast<u32>(stride));
});
return;
}
if (params.include_count) {
const auto count = buffer_cache.GetDrawIndirectCount();
const auto& draw_buffer = count.first;
const auto& offset_base = count.second;
scheduler.Record([draw_buffer_obj = draw_buffer->Handle(),
buffer_obj = buffer->Handle(), offset_base, offset,
params](vk::CommandBuffer cmdbuf) {
if (params.is_indexed) {
cmdbuf.DrawIndexedIndirectCount(
buffer_obj, offset, draw_buffer_obj, offset_base,
static_cast<u32>(params.max_draw_counts), static_cast<u32>(params.stride));
} else {
cmdbuf.DrawIndirectCount(buffer_obj, offset, draw_buffer_obj, offset_base,
static_cast<u32>(params.max_draw_counts),
static_cast<u32>(params.stride));
}
});
return;
}
scheduler.Record([buffer_obj = buffer->Handle(), offset, params](vk::CommandBuffer cmdbuf) {
if (params.is_indexed) {
cmdbuf.DrawIndexedIndirect(buffer_obj, offset,
static_cast<u32>(params.max_draw_counts),
static_cast<u32>(params.stride));
} else {
cmdbuf.DrawIndirect(buffer_obj, offset, static_cast<u32>(params.max_draw_counts),
static_cast<u32>(params.stride));
}
});
});
buffer_cache.SetDrawIndirect(nullptr);
}
void RasterizerVulkan::DrawTexture() {
MICROPROFILE_SCOPE(Vulkan_Drawing);
SCOPE_EXIT {
gpu.TickWork();
};
FlushWork();
query_cache.NotifySegment(true);
std::scoped_lock l{texture_cache.mutex};
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
UpdateDynamicStates();
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
const auto* framebuffer = texture_cache.GetFramebuffer();
const bool src_rescaling = texture_cache.IsRescaling() && texture.IsRescaled();
const bool dst_rescaling = texture_cache.IsRescaling() && framebuffer->IsRescaled();
const auto ScaleSrc = [&](auto dim_f) -> s32 {
auto dim = static_cast<s32>(dim_f);
return src_rescaling ? Settings::values.resolution_info.ScaleUp(dim) : dim;
};
const auto ScaleDst = [&](auto dim_f) -> s32 {
auto dim = static_cast<s32>(dim_f);
return dst_rescaling ? Settings::values.resolution_info.ScaleUp(dim) : dim;
};
Region2D dst_region = {Offset2D{.x = ScaleDst(draw_texture_state.dst_x0),
.y = ScaleDst(draw_texture_state.dst_y0)},
Offset2D{.x = ScaleDst(draw_texture_state.dst_x1),
.y = ScaleDst(draw_texture_state.dst_y1)}};
Region2D src_region = {Offset2D{.x = ScaleSrc(draw_texture_state.src_x0),
.y = ScaleSrc(draw_texture_state.src_y0)},
Offset2D{.x = ScaleSrc(draw_texture_state.src_x1),
.y = ScaleSrc(draw_texture_state.src_y1)}};
Extent3D src_size = {static_cast<u32>(ScaleSrc(texture.size.width)),
static_cast<u32>(ScaleSrc(texture.size.height)), texture.size.depth};
blit_image.BlitColor(framebuffer, texture.RenderTarget(), texture.ImageHandle(),
sampler->Handle(), dst_region, src_region, src_size);
}
void RasterizerVulkan::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(Vulkan_Clearing);
FlushWork();
gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
regs.clear_surface.A;
const bool use_depth = regs.clear_surface.Z;
const bool use_stencil = regs.clear_surface.S;
if (!use_color && !use_depth && !use_stencil) {
return;
}
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer);
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
UpdateViewportsState(regs);
VkRect2D default_scissor;
default_scissor.offset.x = 0;
default_scissor.offset.y = 0;
default_scissor.extent.width = std::numeric_limits<s32>::max();
default_scissor.extent.height = std::numeric_limits<s32>::max();
VkClearRect clear_rect{
.rect = regs.clear_control.use_scissor ? GetScissorState(regs, 0, up_scale, down_shift)
: default_scissor,
.baseArrayLayer = regs.clear_surface.layer,
.layerCount = layer_count,
};
if (clear_rect.rect.extent.width == 0 || clear_rect.rect.extent.height == 0) {
return;
}
clear_rect.rect.extent = VkExtent2D{
.width = std::min(clear_rect.rect.extent.width, render_area.width),
.height = std::min(clear_rect.rect.extent.height, render_area.height),
};
const u32 color_attachment = regs.clear_surface.RT;
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
const auto format =
VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
bool is_integer = IsPixelFormatInteger(format);
bool is_signed = IsPixelFormatSignedInteger(format);
size_t int_size = PixelComponentSizeBitsInteger(format);
VkClearValue clear_value{};
if (!is_integer) {
std::memcpy(clear_value.color.float32, regs.clear_color.data(),
regs.clear_color.size() * sizeof(f32));
} else if (!is_signed) {
for (size_t i = 0; i < 4; i++) {
clear_value.color.uint32[i] = static_cast<u32>(
static_cast<f32>(static_cast<u64>(int_size) << 1U) * regs.clear_color[i]);
}
} else {
for (size_t i = 0; i < 4; i++) {
clear_value.color.int32[i] =
static_cast<s32>(static_cast<f32>(static_cast<s64>(int_size - 1) << 1) *
(regs.clear_color[i] - 0.5f));
}
}
if (regs.clear_surface.R && regs.clear_surface.G && regs.clear_surface.B &&
regs.clear_surface.A) {
scheduler.Record([color_attachment, clear_value, clear_rect](vk::CommandBuffer cmdbuf) {
const VkClearAttachment attachment{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.colorAttachment = color_attachment,
.clearValue = clear_value,
};
cmdbuf.ClearAttachments(attachment, clear_rect);
});
} else {
u8 color_mask = static_cast<u8>(regs.clear_surface.R | regs.clear_surface.G << 1 |
regs.clear_surface.B << 2 | regs.clear_surface.A << 3);
Region2D dst_region = {
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
Offset2D{.x = clear_rect.rect.offset.x +
static_cast<s32>(clear_rect.rect.extent.width),
.y = clear_rect.rect.offset.y +
static_cast<s32>(clear_rect.rect.extent.height)}};
blit_image.ClearColor(framebuffer, color_mask, regs.clear_color, dst_region);
}
}
if (!use_depth && !use_stencil) {
return;
}
VkImageAspectFlags aspect_flags = 0;
if (use_depth && framebuffer->HasAspectDepthBit()) {
aspect_flags |= VK_IMAGE_ASPECT_DEPTH_BIT;
}
if (use_stencil && framebuffer->HasAspectStencilBit()) {
aspect_flags |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
if (aspect_flags == 0) {
return;
}
if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF &&
regs.stencil_front_mask != 0) {
Region2D dst_region = {
Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y},
Offset2D{.x = clear_rect.rect.offset.x + static_cast<s32>(clear_rect.rect.extent.width),
.y = clear_rect.rect.offset.y +
static_cast<s32>(clear_rect.rect.extent.height)}};
blit_image.ClearDepthStencil(framebuffer, use_depth, regs.clear_depth,
static_cast<u8>(regs.stencil_front_mask), regs.clear_stencil,
regs.stencil_front_func_mask, dst_region);
} else {
scheduler.Record([clear_depth = regs.clear_depth, clear_stencil = regs.clear_stencil,
clear_rect, aspect_flags](vk::CommandBuffer cmdbuf) {
VkClearAttachment attachment;
attachment.aspectMask = aspect_flags;
attachment.colorAttachment = 0;
attachment.clearValue.depthStencil.depth = clear_depth;
attachment.clearValue.depthStencil.stencil = clear_stencil;
cmdbuf.ClearAttachments(attachment, clear_rect);
});
}
}
void RasterizerVulkan::DispatchCompute() {
FlushWork();
gpu_memory->FlushCaching();
ComputePipeline* const pipeline{pipeline_cache.CurrentComputePipeline()};
if (!pipeline) {
return;
}
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
const auto& qmd{kepler_compute->launch_description};
auto indirect_address = kepler_compute->GetIndirectComputeAddress();
if (indirect_address) {
// DispatchIndirect
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([indirect_buffer = buffer->Handle(),
indirect_offset = offset](vk::CommandBuffer cmdbuf) {
cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset);
});
return;
}
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
}
void RasterizerVulkan::ResetCounter(VideoCommon::QueryType type) {
if (type != VideoCommon::QueryType::ZPassPixelCount64) {
LOG_DEBUG(Render_Vulkan, "Unimplemented counter reset={}", type);
return;
}
query_cache.CounterReset(type);
}
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
query_cache.CounterReport(gpu_addr, type, flags, payload, subreport);
}
void RasterizerVulkan::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
}
void Vulkan::RasterizerVulkan::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
buffer_cache.DisableGraphicsUniformBuffer(stage, index);
}
void RasterizerVulkan::FlushAll() {}
void RasterizerVulkan::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) {
return;
}
if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
if ((True(which & VideoCommon::CacheType::QueryCache))) {
query_cache.FlushRegion(addr, size);
}
}
bool RasterizerVulkan::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.IsRegionGpuModified(addr, size)) {
return true;
}
}
if (!Settings::IsGPULevelHigh()) {
return false;
}
if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
return texture_cache.IsRegionGpuModified(addr, size);
}
return false;
}
VideoCore::RasterizerDownloadArea RasterizerVulkan::GetFlushArea(DAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
auto area = texture_cache.GetFlushArea(addr, size);
if (area) {
return *area;
}
}
VideoCore::RasterizerDownloadArea new_area{
.start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE),
.end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE),
.preemtive = true,
};
return new_area;
}
void RasterizerVulkan::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
if (addr == 0 || size == 0) {
return;
}
if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
if ((True(which & VideoCommon::CacheType::QueryCache))) {
query_cache.InvalidateRegion(addr, size);
}
if ((True(which & VideoCommon::CacheType::ShaderCache))) {
pipeline_cache.InvalidateRegion(addr, size);
}
}
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<DAddr, std::size_t>> sequences) {
{
std::scoped_lock lock{texture_cache.mutex};
for (const auto& [addr, size] : sequences) {
texture_cache.WriteMemory(addr, size);
}
}
{
std::scoped_lock lock{buffer_cache.mutex};
for (const auto& [addr, size] : sequences) {
buffer_cache.WriteMemory(addr, size);
}
}
{
for (const auto& [addr, size] : sequences) {
query_cache.InvalidateRegion(addr, size);
pipeline_cache.InvalidateRegion(addr, size);
}
}
}
bool RasterizerVulkan::OnCPUWrite(DAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return false;
}
{
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.OnCPUWrite(addr, size)) {
return true;
}
}
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
pipeline_cache.InvalidateRegion(addr, size);
return false;
}
void RasterizerVulkan::OnCacheInvalidation(DAddr addr, u64 size) {
if (addr == 0 || size == 0) {
return;
}
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
pipeline_cache.InvalidateRegion(addr, size);
}
void RasterizerVulkan::InvalidateGPUCache() {
gpu.InvalidateGPUCache();
}
void RasterizerVulkan::UnmapMemory(DAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
pipeline_cache.OnCacheInvalidation(addr, size);
}
void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapGPUMemory(as_id, addr, size);
}
}
void RasterizerVulkan::SignalFence(std::function<void()>&& func) {
fence_manager.SignalFence(std::move(func));
}
void RasterizerVulkan::SyncOperation(std::function<void()>&& func) {
fence_manager.SyncOperation(std::move(func));
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
fence_manager.SignalSyncPoint(value);
}
void RasterizerVulkan::SignalReference() {
fence_manager.SignalReference();
}
void RasterizerVulkan::ReleaseFences(bool force) {
fence_manager.WaitPendingFences(force);
}
void RasterizerVulkan::FlushAndInvalidateRegion(DAddr addr, u64 size,
VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size, which);
}
InvalidateRegion(addr, size, which);
}
void RasterizerVulkan::WaitForIdle() {
// Everything but wait pixel operations. This intentionally includes FRAGMENT_SHADER_BIT because
// fragment shaders can still write storage buffers.
VkPipelineStageFlags flags =
VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT;
}
query_cache.NotifyWFI();
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([event = *wfi_event, flags](vk::CommandBuffer cmdbuf) {
cmdbuf.SetEvent(event, flags);
cmdbuf.WaitEvents(event, flags, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, {}, {}, {});
});
fence_manager.SignalOrdering();
}
void RasterizerVulkan::FragmentBarrier() {
// We already put barriers when a render pass finishes
scheduler.RequestOutsideRenderPassOperationContext();
}
void RasterizerVulkan::TiledCacheBarrier() {
// TODO: Implementing tiled barriers requires rewriting a good chunk of the Vulkan backend
}
void RasterizerVulkan::FlushCommands() {
if (draw_counter == 0) {
return;
}
draw_counter = 0;
scheduler.Flush();
}
void RasterizerVulkan::TickFrame() {
draw_counter = 0;
guest_descriptor_queue.TickFrame();
compute_pass_descriptor_queue.TickFrame();
fence_manager.TickFrame();
staging_pool.TickFrame();
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.TickFrame();
}
}
bool RasterizerVulkan::AccelerateConditionalRendering() {
gpu_memory->FlushCaching();
return query_cache.AccelerateHostConditionalRendering();
}
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
std::scoped_lock lock{texture_cache.mutex};
return texture_cache.BlitImage(dst, src, copy_config);
}
Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() {
return accelerate_dma;
}
void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) {
auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
gpu_memory->WriteBlock(address, memory.data(), copy_size);
return;
}
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}
}
{
std::scoped_lock lock_texture{texture_cache.mutex};
texture_cache.WriteMemory(*cpu_addr, copy_size);
}
pipeline_cache.InvalidateRegion(*cpu_addr, copy_size);
query_cache.InvalidateRegion(*cpu_addr, copy_size);
}
std::optional<FramebufferTextureInfo> RasterizerVulkan::AccelerateDisplay(
const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) {
if (!framebuffer_addr) {
return {};
}
std::scoped_lock lock{texture_cache.mutex};
const auto [image_view, scaled] =
texture_cache.TryFindFramebufferImageView(config, framebuffer_addr);
if (!image_view) {
return {};
}
query_cache.NotifySegment(false);
const auto& resolution = Settings::values.resolution_info;
FramebufferTextureInfo info{};
info.image = image_view->ImageHandle();
info.image_view = image_view->Handle(Shader::TextureType::Color2D);
info.width = image_view->size.width;
info.height = image_view->size.height;
info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width;
info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height;
return info;
}
void RasterizerVulkan::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
pipeline_cache.LoadDiskResources(title_id, stop_loading, callback);
}
void RasterizerVulkan::FlushWork() {
#ifdef ANDROID
static constexpr u32 DRAWS_TO_DISPATCH = 1024;
#else
static constexpr u32 DRAWS_TO_DISPATCH = 4096;
#endif // ANDROID
// Only check multiples of 8 draws
static_assert(DRAWS_TO_DISPATCH % 8 == 0);
if ((++draw_counter & 7) != 7) {
return;
}
if (draw_counter < DRAWS_TO_DISPATCH) {
// Send recorded tasks to the worker thread
scheduler.DispatchWork();
return;
}
// Otherwise (every certain number of draws) flush execution.
// This submits commands to the Vulkan driver.
scheduler.Flush();
draw_counter = 0;
}
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_,
Scheduler& scheduler_)
: buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, scheduler{scheduler_} {}
bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
std::scoped_lock lock{buffer_cache.mutex};
return buffer_cache.DMAClear(src_address, amount, value);
}
bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
std::scoped_lock lock{buffer_cache.mutex};
return buffer_cache.DMACopy(src_address, dest_address, amount);
}
template <bool IS_IMAGE_UPLOAD>
bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
const Tegra::DMA::BufferOperand& buffer_operand,
const Tegra::DMA::ImageOperand& image_operand) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
const auto image_id = texture_cache.DmaImageId(image_operand, IS_IMAGE_UPLOAD);
if (image_id == VideoCommon::NULL_IMAGE_ID) {
return false;
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
: VideoCommon::ObtainBufferOperation::MarkAsWritten;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
const auto [image, copy] = texture_cache.DmaBufferImageCopy(
copy_info, buffer_operand, image_operand, image_id, IS_IMAGE_UPLOAD);
const std::span copy_span{©, 1};
if constexpr (IS_IMAGE_UPLOAD) {
texture_cache.PrepareImage(image_id, true, false);
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
if (offset % BytesPerBlock(image->info.format)) {
return false;
}
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
buffer_operand.address, buffer_size);
}
return true;
}
bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info,
const Tegra::DMA::ImageOperand& image_operand,
const Tegra::DMA::BufferOperand& buffer_operand) {
return DmaBufferImageCopy<false>(copy_info, buffer_operand, image_operand);
}
bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
const Tegra::DMA::BufferOperand& buffer_operand,
const Tegra::DMA::ImageOperand& image_operand) {
return DmaBufferImageCopy<true>(copy_info, buffer_operand, image_operand);
}
void RasterizerVulkan::UpdateDynamicStates() {
auto& regs = maxwell3d->regs;
UpdateViewportsState(regs);
UpdateScissorsState(regs);
UpdateDepthBias(regs);
UpdateBlendConstants(regs);
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
if (device.IsExtExtendedDynamicStateSupported()) {
UpdateCullMode(regs);
UpdateDepthCompareOp(regs);
UpdateFrontFace(regs);
UpdateStencilOp(regs);
if (state_tracker.TouchStateEnable()) {
UpdateDepthBoundsTestEnable(regs);
UpdateDepthTestEnable(regs);
UpdateDepthWriteEnable(regs);
UpdateStencilTestEnable(regs);
if (device.IsExtExtendedDynamicState2Supported()) {
UpdatePrimitiveRestartEnable(regs);
UpdateRasterizerDiscardEnable(regs);
UpdateDepthBiasEnable(regs);
}
if (device.IsExtExtendedDynamicState3EnablesSupported()) {
UpdateLogicOpEnable(regs);
UpdateDepthClampEnable(regs);
}
}
if (device.IsExtExtendedDynamicState2ExtrasSupported()) {
UpdateLogicOp(regs);
}
if (device.IsExtExtendedDynamicState3Supported()) {
UpdateBlending(regs);
}
}
if (device.IsExtVertexInputDynamicStateSupported()) {
UpdateVertexInput(regs);
}
}
void RasterizerVulkan::HandleTransformFeedback() {
static std::once_flag warn_unsupported;
const auto& regs = maxwell3d->regs;
if (!device.IsExtTransformFeedbackSupported()) {
std::call_once(warn_unsupported, [&] {
LOG_ERROR(Render_Vulkan, "Transform feedbacks used but not supported");
});
return;
}
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount,
regs.transform_feedback_enabled);
if (regs.transform_feedback_enabled != 0) {
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
}
}
void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchViewports()) {
return;
}
if (!regs.viewport_scale_offset_enabled) {
const auto x = static_cast<float>(regs.surface_clip.x);
const auto y = static_cast<float>(regs.surface_clip.y);
const auto width = static_cast<float>(regs.surface_clip.width);
const auto height = static_cast<float>(regs.surface_clip.height);
VkViewport viewport{
.x = x,
.y = y,
.width = width != 0.0f ? width : 1.0f,
.height = height != 0.0f ? height : 1.0f,
.minDepth = 0.0f,
.maxDepth = 1.0f,
};
scheduler.Record([viewport](vk::CommandBuffer cmdbuf) { cmdbuf.SetViewport(0, viewport); });
return;
}
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
const std::array viewport_list{
GetViewportState(device, regs, 0, scale), GetViewportState(device, regs, 1, scale),
GetViewportState(device, regs, 2, scale), GetViewportState(device, regs, 3, scale),
GetViewportState(device, regs, 4, scale), GetViewportState(device, regs, 5, scale),
GetViewportState(device, regs, 6, scale), GetViewportState(device, regs, 7, scale),
GetViewportState(device, regs, 8, scale), GetViewportState(device, regs, 9, scale),
GetViewportState(device, regs, 10, scale), GetViewportState(device, regs, 11, scale),
GetViewportState(device, regs, 12, scale), GetViewportState(device, regs, 13, scale),
GetViewportState(device, regs, 14, scale), GetViewportState(device, regs, 15, scale),
};
scheduler.Record([this, viewport_list](vk::CommandBuffer cmdbuf) {
const u32 num_viewports = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
const vk::Span<VkViewport> viewports(viewport_list.data(), num_viewports);
cmdbuf.SetViewport(0, viewports);
});
}
void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchScissors()) {
return;
}
if (!regs.viewport_scale_offset_enabled) {
const auto x = static_cast<float>(regs.surface_clip.x);
const auto y = static_cast<float>(regs.surface_clip.y);
const auto width = static_cast<float>(regs.surface_clip.width);
const auto height = static_cast<float>(regs.surface_clip.height);
VkRect2D scissor;
scissor.offset.x = static_cast<u32>(x);
scissor.offset.y = static_cast<u32>(y);
scissor.extent.width = static_cast<u32>(width != 0.0f ? width : 1.0f);
scissor.extent.height = static_cast<u32>(height != 0.0f ? height : 1.0f);
scheduler.Record([scissor](vk::CommandBuffer cmdbuf) { cmdbuf.SetScissor(0, scissor); });
return;
}
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
up_scale = Settings::values.resolution_info.up_scale;
down_shift = Settings::values.resolution_info.down_shift;
}
const std::array scissor_list{
GetScissorState(regs, 0, up_scale, down_shift),
GetScissorState(regs, 1, up_scale, down_shift),
GetScissorState(regs, 2, up_scale, down_shift),
GetScissorState(regs, 3, up_scale, down_shift),
GetScissorState(regs, 4, up_scale, down_shift),
GetScissorState(regs, 5, up_scale, down_shift),
GetScissorState(regs, 6, up_scale, down_shift),
GetScissorState(regs, 7, up_scale, down_shift),
GetScissorState(regs, 8, up_scale, down_shift),
GetScissorState(regs, 9, up_scale, down_shift),
GetScissorState(regs, 10, up_scale, down_shift),
GetScissorState(regs, 11, up_scale, down_shift),
GetScissorState(regs, 12, up_scale, down_shift),
GetScissorState(regs, 13, up_scale, down_shift),
GetScissorState(regs, 14, up_scale, down_shift),
GetScissorState(regs, 15, up_scale, down_shift),
};
scheduler.Record([this, scissor_list](vk::CommandBuffer cmdbuf) {
const u32 num_scissors = std::min<u32>(device.GetMaxViewports(), Maxwell::NumViewports);
const vk::Span<VkRect2D> scissors(scissor_list.data(), num_scissors);
cmdbuf.SetScissor(0, scissors);
});
}
void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBias()) {
return;
}
float units = regs.depth_bias / 2.0f;
const bool is_d24 = regs.zeta.format == Tegra::DepthFormat::Z24_UNORM_S8_UINT ||
regs.zeta.format == Tegra::DepthFormat::X8Z24_UNORM ||
regs.zeta.format == Tegra::DepthFormat::S8Z24_UNORM ||
regs.zeta.format == Tegra::DepthFormat::V8Z24_UNORM;
if (is_d24 && !device.SupportsD24DepthBuffer() && program_id == 0x1006A800016E000ULL) {
// Only activate this in Super Smash Brothers Ultimate
// the base formulas can be obtained from here:
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
const double rescale_factor =
static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
units = static_cast<float>(static_cast<double>(units) * rescale_factor);
}
scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
factor = regs.slope_scale_depth_bias](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBias(constant, clamp, factor);
});
}
void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchBlendConstants()) {
return;
}
const std::array blend_color = {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
regs.blend_color.a};
scheduler.Record(
[blend_color](vk::CommandBuffer cmdbuf) { cmdbuf.SetBlendConstants(blend_color.data()); });
}
void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBounds()) {
return;
}
scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBounds(min, max); });
}
void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilProperties()) {
return;
}
bool update_references = state_tracker.TouchStencilReference();
bool update_write_mask = state_tracker.TouchStencilWriteMask();
bool update_compare_masks = state_tracker.TouchStencilCompare();
if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
update_references = true;
update_write_mask = true;
update_compare_masks = true;
}
if (update_references) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) &&
!state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) {
return;
}
} else {
if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
return;
}
}
scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_ref != back_ref;
// Front face
cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_ref);
if (set_back) {
cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
}
});
}();
}
if (update_write_mask) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) &&
!state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
return;
}
}
scheduler.Record([front_write_mask = regs.stencil_front_mask,
back_write_mask = regs.stencil_back_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_write_mask != back_write_mask;
// Front face
cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_write_mask);
if (set_back) {
cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
}
});
}();
}
if (update_compare_masks) {
[&]() {
if (regs.stencil_two_side_enable) {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) &&
!state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
return;
}
} else {
if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
return;
}
}
scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
back_test_mask = regs.stencil_back_func_mask,
two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
const bool set_back = two_sided && front_test_mask != back_test_mask;
// Front face
cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
: VK_STENCIL_FACE_FRONT_AND_BACK,
front_test_mask);
if (set_back) {
cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
}
});
}();
}
state_tracker.ClearStencilReset();
}
void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchLineWidth()) {
return;
}
const float width =
regs.line_anti_alias_enable ? regs.line_width_smooth : regs.line_width_aliased;
scheduler.Record([width](vk::CommandBuffer cmdbuf) { cmdbuf.SetLineWidth(width); });
}
void RasterizerVulkan::UpdateCullMode(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchCullMode()) {
return;
}
scheduler.Record([enabled = regs.gl_cull_test_enabled,
cull_face = regs.gl_cull_face](vk::CommandBuffer cmdbuf) {
cmdbuf.SetCullModeEXT(enabled ? MaxwellToVK::CullFace(cull_face) : VK_CULL_MODE_NONE);
});
}
void RasterizerVulkan::UpdateDepthBoundsTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBoundsTestEnable()) {
return;
}
bool enabled = regs.depth_bounds_enable;
if (enabled && !device.IsDepthBoundsSupported()) {
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
enabled = false;
}
scheduler.Record([enable = enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthBoundsTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthTestEnable()) {
return;
}
scheduler.Record([enable = regs.depth_test_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateDepthWriteEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthWriteEnable()) {
return;
}
scheduler.Record([enable = regs.depth_write_enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthWriteEnableEXT(enable);
});
}
void RasterizerVulkan::UpdatePrimitiveRestartEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchPrimitiveRestartEnable()) {
return;
}
scheduler.Record([enable = regs.primitive_restart.enabled](vk::CommandBuffer cmdbuf) {
cmdbuf.SetPrimitiveRestartEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchRasterizerDiscardEnable()) {
return;
}
scheduler.Record([disable = regs.rasterize_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetRasterizerDiscardEnableEXT(disable == 0);
});
}
void RasterizerVulkan::UpdateDepthBiasEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthBiasEnable()) {
return;
}
constexpr size_t POINT = 0;
constexpr size_t LINE = 1;
constexpr size_t POLYGON = 2;
static constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
POINT, // Points
LINE, // Lines
LINE, // LineLoop
LINE, // LineStrip
POLYGON, // Triangles
POLYGON, // TriangleStrip
POLYGON, // TriangleFan
POLYGON, // Quads
POLYGON, // QuadStrip
POLYGON, // Polygon
LINE, // LinesAdjacency
LINE, // LineStripAdjacency
POLYGON, // TrianglesAdjacency
POLYGON, // TriangleStripAdjacency
POLYGON, // Patches
};
const std::array enabled_lut{
regs.polygon_offset_point_enable,
regs.polygon_offset_line_enable,
regs.polygon_offset_fill_enable,
};
const u32 topology_index = static_cast<u32>(maxwell3d->draw_manager->GetDrawState().topology);
const u32 enable = enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]];
scheduler.Record(
[enable](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthBiasEnableEXT(enable != 0); });
}
void RasterizerVulkan::UpdateLogicOpEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchLogicOpEnable()) {
return;
}
scheduler.Record([enable = regs.logic_op.enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLogicOpEnableEXT(enable != 0);
});
}
void RasterizerVulkan::UpdateDepthClampEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthClampEnable()) {
return;
}
bool is_enabled = !(regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumZ);
scheduler.Record(
[is_enabled](vk::CommandBuffer cmdbuf) { cmdbuf.SetDepthClampEnableEXT(is_enabled); });
}
void RasterizerVulkan::UpdateDepthCompareOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchDepthCompareOp()) {
return;
}
scheduler.Record([func = regs.depth_test_func](vk::CommandBuffer cmdbuf) {
cmdbuf.SetDepthCompareOpEXT(MaxwellToVK::ComparisonOp(func));
});
}
void RasterizerVulkan::UpdateFrontFace(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchFrontFace()) {
return;
}
VkFrontFace front_face = MaxwellToVK::FrontFace(regs.gl_front_face);
if (regs.window_origin.flip_y != 0) {
front_face = front_face == VK_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_COUNTER_CLOCKWISE
: VK_FRONT_FACE_CLOCKWISE;
}
scheduler.Record(
[front_face](vk::CommandBuffer cmdbuf) { cmdbuf.SetFrontFaceEXT(front_face); });
}
void RasterizerVulkan::UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilOp()) {
return;
}
const Maxwell::StencilOp::Op fail = regs.stencil_front_op.fail;
const Maxwell::StencilOp::Op zfail = regs.stencil_front_op.zfail;
const Maxwell::StencilOp::Op zpass = regs.stencil_front_op.zpass;
const Maxwell::ComparisonOp compare = regs.stencil_front_op.func;
if (regs.stencil_two_side_enable) {
// Separate stencil op per face
const Maxwell::StencilOp::Op back_fail = regs.stencil_back_op.fail;
const Maxwell::StencilOp::Op back_zfail = regs.stencil_back_op.zfail;
const Maxwell::StencilOp::Op back_zpass = regs.stencil_back_op.zpass;
const Maxwell::ComparisonOp back_compare = regs.stencil_back_op.func;
scheduler.Record([fail, zfail, zpass, compare, back_fail, back_zfail, back_zpass,
back_compare](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_BIT, MaxwellToVK::StencilOp(fail),
MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
MaxwellToVK::ComparisonOp(compare));
cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_BACK_BIT, MaxwellToVK::StencilOp(back_fail),
MaxwellToVK::StencilOp(back_zpass),
MaxwellToVK::StencilOp(back_zfail),
MaxwellToVK::ComparisonOp(back_compare));
});
} else {
// Front face defines the stencil op of both faces
scheduler.Record([fail, zfail, zpass, compare](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilOpEXT(VK_STENCIL_FACE_FRONT_AND_BACK, MaxwellToVK::StencilOp(fail),
MaxwellToVK::StencilOp(zpass), MaxwellToVK::StencilOp(zfail),
MaxwellToVK::ComparisonOp(compare));
});
}
}
void RasterizerVulkan::UpdateLogicOp(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchLogicOp()) {
return;
}
const auto op_value = static_cast<u32>(regs.logic_op.op);
auto op = op_value >= 0x1500 && op_value < 0x1510 ? static_cast<VkLogicOp>(op_value - 0x1500)
: VK_LOGIC_OP_NO_OP;
scheduler.Record([op](vk::CommandBuffer cmdbuf) { cmdbuf.SetLogicOpEXT(op); });
}
void RasterizerVulkan::UpdateBlending(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchBlending()) {
return;
}
if (state_tracker.TouchColorMask()) {
std::array<VkColorComponentFlags, Maxwell::NumRenderTargets> setup_masks{};
for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) {
const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
auto& current = setup_masks[index];
if (mask.R) {
current |= VK_COLOR_COMPONENT_R_BIT;
}
if (mask.G) {
current |= VK_COLOR_COMPONENT_G_BIT;
}
if (mask.B) {
current |= VK_COLOR_COMPONENT_B_BIT;
}
if (mask.A) {
current |= VK_COLOR_COMPONENT_A_BIT;
}
}
scheduler.Record([setup_masks](vk::CommandBuffer cmdbuf) {
cmdbuf.SetColorWriteMaskEXT(0, setup_masks);
});
}
if (state_tracker.TouchBlendEnable()) {
std::array<VkBool32, Maxwell::NumRenderTargets> setup_enables{};
std::ranges::transform(
regs.blend.enable, setup_enables.begin(),
[&](const auto& is_enabled) { return is_enabled != 0 ? VK_TRUE : VK_FALSE; });
scheduler.Record([setup_enables](vk::CommandBuffer cmdbuf) {
cmdbuf.SetColorBlendEnableEXT(0, setup_enables);
});
}
if (state_tracker.TouchBlendEquations()) {
std::array<VkColorBlendEquationEXT, Maxwell::NumRenderTargets> setup_blends{};
for (size_t index = 0; index < Maxwell::NumRenderTargets; index++) {
const auto blend_setup = [&]<typename T>(const T& guest_blend) {
auto& host_blend = setup_blends[index];
host_blend.srcColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_source);
host_blend.dstColorBlendFactor = MaxwellToVK::BlendFactor(guest_blend.color_dest);
host_blend.colorBlendOp = MaxwellToVK::BlendEquation(guest_blend.color_op);
host_blend.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_source);
host_blend.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(guest_blend.alpha_dest);
host_blend.alphaBlendOp = MaxwellToVK::BlendEquation(guest_blend.alpha_op);
};
if (!regs.blend_per_target_enabled) {
blend_setup(regs.blend);
continue;
}
blend_setup(regs.blend_per_target[index]);
}
scheduler.Record([setup_blends](vk::CommandBuffer cmdbuf) {
cmdbuf.SetColorBlendEquationEXT(0, setup_blends);
});
}
}
void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchStencilTestEnable()) {
return;
}
scheduler.Record([enable = regs.stencil_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetStencilTestEnableEXT(enable);
});
}
void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
auto& dirty{maxwell3d->dirty.flags};
if (!dirty[Dirty::VertexInput]) {
return;
}
dirty[Dirty::VertexInput] = false;
boost::container::static_vector<VkVertexInputBindingDescription2EXT, 32> bindings;
boost::container::static_vector<VkVertexInputAttributeDescription2EXT, 32> attributes;
// There seems to be a bug on Nvidia's driver where updating only higher attributes ends up
// generating dirty state. Track the highest dirty attribute and update all attributes until
// that one.
size_t highest_dirty_attr{};
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
if (dirty[Dirty::VertexAttribute0 + index]) {
highest_dirty_attr = index;
}
}
for (size_t index = 0; index < highest_dirty_attr; ++index) {
const Maxwell::VertexAttribute attribute{regs.vertex_attrib_format[index]};
const u32 binding{attribute.buffer};
dirty[Dirty::VertexAttribute0 + index] = false;
dirty[Dirty::VertexBinding0 + static_cast<size_t>(binding)] = true;
if (!attribute.constant) {
attributes.push_back({
.sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT,
.pNext = nullptr,
.location = static_cast<u32>(index),
.binding = binding,
.format = MaxwellToVK::VertexFormat(device, attribute.type, attribute.size),
.offset = attribute.offset,
});
}
}
for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) {
if (!dirty[Dirty::VertexBinding0 + index]) {
continue;
}
dirty[Dirty::VertexBinding0 + index] = false;
const u32 binding{static_cast<u32>(index)};
const auto& input_binding{regs.vertex_streams[binding]};
const bool is_instanced{regs.vertex_stream_instances.IsInstancingEnabled(binding)};
bindings.push_back({
.sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_BINDING_DESCRIPTION_2_EXT,
.pNext = nullptr,
.binding = binding,
.stride = input_binding.stride,
.inputRate = is_instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX,
.divisor = is_instanced ? input_binding.frequency : 1,
});
}
scheduler.Record([bindings, attributes](vk::CommandBuffer cmdbuf) {
cmdbuf.SetVertexInputEXT(bindings, attributes);
});
}
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
CreateChannel(channel);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CreateChannel(channel);
buffer_cache.CreateChannel(channel);
}
pipeline_cache.CreateChannel(channel);
query_cache.CreateChannel(channel);
state_tracker.SetupTables(channel);
}
void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
const s32 channel_id = channel.bind_id;
BindToChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.BindToChannel(channel_id);
buffer_cache.BindToChannel(channel_id);
}
pipeline_cache.BindToChannel(channel_id);
query_cache.BindToChannel(channel_id);
state_tracker.ChangeChannel(channel);
state_tracker.InvalidateState();
}
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
EraseChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.EraseChannel(channel_id);
buffer_cache.EraseChannel(channel_id);
}
pipeline_cache.EraseChannel(channel_id);
query_cache.EraseChannel(channel_id);
}
} // namespace Vulkan