// SPDX-FileCopyrightText: 2015 Citra Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <algorithm>
#include <array>
#include <bitset>
#include <memory>
#include <string_view>
#include <utility>
#include <glad/glad.h>
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "video_core/control/channel_state.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache_base.h"
namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using GLvec4 = std::array<GLfloat, 4>;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Clears, "OpenGL", "Clears", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Management", MP_RGB(100, 255, 100));
namespace {
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
void oglEnable(GLenum cap, bool state) {
(state ? glEnable : glDisable)(cap);
}
std::optional<VideoCore::QueryType> MaxwellToVideoCoreQuery(VideoCommon::QueryType type) {
switch (type) {
case VideoCommon::QueryType::PrimitivesGenerated:
case VideoCommon::QueryType::VtgPrimitivesOut:
return VideoCore::QueryType::PrimitivesGenerated;
case VideoCommon::QueryType::ZPassPixelCount64:
return VideoCore::QueryType::SamplesPassed;
case VideoCommon::QueryType::StreamingPrimitivesSucceeded:
// case VideoCommon::QueryType::StreamingByteCount:
// TODO: StreamingByteCount = StreamingPrimitivesSucceeded * num_verts * vert_stride
return VideoCore::QueryType::TfbPrimitivesWritten;
default:
return std::nullopt;
}
}
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
Tegra::MaxwellDeviceMemoryManager& device_memory_,
const Device& device_, ProgramManager& program_manager_,
StateTracker& state_tracker_)
: gpu(gpu_), device_memory(device_memory_), device(device_), program_manager(program_manager_),
state_tracker(state_tracker_),
texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
texture_cache(texture_cache_runtime, device_memory_),
buffer_cache_runtime(device, staging_buffer_pool),
buffer_cache(device_memory_, buffer_cache_runtime),
shader_cache(device_memory_, emu_window_, device, texture_cache, buffer_cache,
program_manager, state_tracker, gpu.ShaderNotify()),
query_cache(*this, device_memory_), accelerate_dma(buffer_cache, texture_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
blit_image(program_manager_) {}
RasterizerOpenGL::~RasterizerOpenGL() = default;
void RasterizerOpenGL::SyncVertexFormats() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::VertexFormats]) {
return;
}
flags[Dirty::VertexFormats] = false;
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
// the first 16 vertex attributes always, as we don't know which ones are actually used until
// shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
// avoid OpenGL errors.
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
// assume every shader uses them all.
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexFormat0 + index]) {
continue;
}
flags[Dirty::VertexFormat0 + index] = false;
const auto& attrib = maxwell3d->regs.vertex_attrib_format[index];
const auto gl_index = static_cast<GLuint>(index);
// Disable constant attributes.
if (attrib.constant) {
glDisableVertexAttribArray(gl_index);
continue;
}
glEnableVertexAttribArray(gl_index);
if (attrib.type == Maxwell::VertexAttribute::Type::SInt ||
attrib.type == Maxwell::VertexAttribute::Type::UInt) {
glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
MaxwellToGL::VertexFormat(attrib), attrib.offset);
} else {
glVertexAttribFormat(gl_index, attrib.ComponentCount(),
MaxwellToGL::VertexFormat(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
}
glVertexAttribBinding(gl_index, attrib.buffer);
}
}
void RasterizerOpenGL::SyncVertexInstances() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::VertexInstances]) {
return;
}
flags[Dirty::VertexInstances] = false;
const auto& regs = maxwell3d->regs;
for (std::size_t index = 0; index < NUM_SUPPORTED_VERTEX_ATTRIBUTES; ++index) {
if (!flags[Dirty::VertexInstance0 + index]) {
continue;
}
flags[Dirty::VertexInstance0 + index] = false;
const auto gl_index = static_cast<GLuint>(index);
const bool instancing_enabled = regs.vertex_stream_instances.IsInstancingEnabled(gl_index);
const GLuint divisor = instancing_enabled ? regs.vertex_streams[index].frequency : 0;
glVertexBindingDivisor(gl_index, divisor);
}
}
void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
shader_cache.LoadDiskResources(title_id, stop_loading, callback);
}
void RasterizerOpenGL::Clear(u32 layer_count) {
MICROPROFILE_SCOPE(OpenGL_Clears);
gpu_memory->FlushCaching();
const auto& regs = maxwell3d->regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
if (regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
regs.clear_surface.A) {
use_color = true;
const GLuint index = regs.clear_surface.RT;
state_tracker.NotifyColorMask(index);
glColorMaski(index, regs.clear_surface.R != 0, regs.clear_surface.G != 0,
regs.clear_surface.B != 0, regs.clear_surface.A != 0);
// TODO(Rodrigo): Determine if clamping is used on clears
SyncFragmentColorClampState();
SyncFramebufferSRGB();
}
if (regs.clear_surface.Z) {
if (regs.zeta_enable != 0) {
LOG_DEBUG(Render_OpenGL, "Tried to clear Z but buffer is not enabled!");
}
use_depth = true;
state_tracker.NotifyDepthMask();
glDepthMask(GL_TRUE);
}
if (regs.clear_surface.S) {
if (regs.zeta_enable) {
LOG_DEBUG(Render_OpenGL, "Tried to clear stencil but buffer is not enabled!");
}
use_stencil = true;
}
if (!use_color && !use_depth && !use_stencil) {
// No color surface nor depth/stencil surface are enabled
return;
}
SyncRasterizeEnable();
SyncStencilTestState();
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UpdateRenderTargets(true);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
SyncViewport();
if (regs.clear_control.use_scissor) {
SyncScissorTest();
} else {
state_tracker.NotifyScissor0();
glDisablei(GL_SCISSOR_TEST, 0);
}
UNIMPLEMENTED_IF(regs.clear_control.use_viewport_clip0);
if (use_color) {
glClearBufferfv(GL_COLOR, regs.clear_surface.RT, regs.clear_color.data());
}
if (use_depth && use_stencil) {
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
} else if (use_depth) {
glClearBufferfv(GL_DEPTH, 0, ®s.clear_depth);
} else if (use_stencil) {
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
}
++num_queued_commands;
}
template <typename Func>
void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT {
gpu.TickWork();
};
gpu_memory->FlushCaching();
GraphicsPipeline* const pipeline{shader_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
}
gpu.TickWork();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
if (pipeline->UsesLocalMemory()) {
program_manager.LocalMemoryWarmup();
}
pipeline->SetEngine(maxwell3d, gpu_memory);
pipeline->Configure(is_indexed);
SyncState();
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(draw_state.topology);
BeginTransformFeedback(pipeline, primitive_mode);
draw_func(primitive_mode);
EndTransformFeedback();
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::Draw(bool is_indexed, u32 instance_count) {
PrepareDraw(is_indexed, [this, is_indexed, instance_count](GLenum primitive_mode) {
const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
const GLuint base_instance = static_cast<GLuint>(draw_state.base_instance);
const GLsizei num_instances = static_cast<GLsizei>(instance_count);
if (is_indexed) {
const GLint base_vertex = static_cast<GLint>(draw_state.base_index);
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.index_buffer.count);
const GLvoid* const offset = buffer_cache_runtime.IndexOffset();
const GLenum format = MaxwellToGL::IndexFormat(draw_state.index_buffer.format);
if (num_instances == 1 && base_instance == 0 && base_vertex == 0) {
glDrawElements(primitive_mode, num_vertices, format, offset);
} else if (num_instances == 1 && base_instance == 0) {
glDrawElementsBaseVertex(primitive_mode, num_vertices, format, offset, base_vertex);
} else if (base_vertex == 0 && base_instance == 0) {
glDrawElementsInstanced(primitive_mode, num_vertices, format, offset,
num_instances);
} else if (base_vertex == 0) {
glDrawElementsInstancedBaseInstance(primitive_mode, num_vertices, format, offset,
num_instances, base_instance);
} else if (base_instance == 0) {
glDrawElementsInstancedBaseVertex(primitive_mode, num_vertices, format, offset,
num_instances, base_vertex);
} else {
glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, num_vertices, format,
offset, num_instances, base_vertex,
base_instance);
}
} else {
const GLint base_vertex = static_cast<GLint>(draw_state.vertex_buffer.first);
const GLsizei num_vertices = static_cast<GLsizei>(draw_state.vertex_buffer.count);
if (num_instances == 1 && base_instance == 0) {
glDrawArrays(primitive_mode, base_vertex, num_vertices);
} else if (base_instance == 0) {
glDrawArraysInstanced(primitive_mode, base_vertex, num_vertices, num_instances);
} else {
glDrawArraysInstancedBaseInstance(primitive_mode, base_vertex, num_vertices,
num_instances, base_instance);
}
}
});
}
void RasterizerOpenGL::DrawIndirect() {
const auto& params = maxwell3d->draw_manager->GetIndirectParams();
buffer_cache.SetDrawIndirect(¶ms);
PrepareDraw(params.is_indexed, [this, ¶ms](GLenum primitive_mode) {
if (params.is_byte_count) {
const GPUVAddr tfb_object_base_addr = params.indirect_start_address - 4U;
const GLuint tfb_object =
buffer_cache_runtime.GetTransformFeedbackObject(tfb_object_base_addr);
glDrawTransformFeedback(primitive_mode, tfb_object);
return;
}
const auto [buffer, offset] = buffer_cache.GetDrawIndirectBuffer();
const GLvoid* const gl_offset =
reinterpret_cast<const GLvoid*>(static_cast<uintptr_t>(offset));
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buffer->Handle());
if (params.include_count) {
const auto [draw_buffer, offset_base] = buffer_cache.GetDrawIndirectCount();
glBindBuffer(GL_PARAMETER_BUFFER, draw_buffer->Handle());
if (params.is_indexed) {
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
glMultiDrawElementsIndirectCount(primitive_mode, format, gl_offset,
static_cast<GLintptr>(offset_base),
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
} else {
glMultiDrawArraysIndirectCount(primitive_mode, gl_offset,
static_cast<GLintptr>(offset_base),
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
}
return;
}
if (params.is_indexed) {
const GLenum format = MaxwellToGL::IndexFormat(maxwell3d->regs.index_buffer.format);
glMultiDrawElementsIndirect(primitive_mode, format, gl_offset,
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
} else {
glMultiDrawArraysIndirect(primitive_mode, gl_offset,
static_cast<GLsizei>(params.max_draw_counts),
static_cast<GLsizei>(params.stride));
}
});
buffer_cache.SetDrawIndirect(nullptr);
}
void RasterizerOpenGL::DrawTexture() {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT {
gpu.TickWork();
};
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
SyncState();
const auto& draw_texture_state = maxwell3d->draw_manager->GetDrawTextureState();
const auto& sampler = texture_cache.GetGraphicsSampler(draw_texture_state.src_sampler);
const auto& texture = texture_cache.GetImageView(draw_texture_state.src_texture);
const auto Scale = [&](auto dim) -> s32 {
return Settings::values.resolution_info.ScaleUp(static_cast<s32>(dim));
};
Region2D dst_region = {
Offset2D{.x = Scale(draw_texture_state.dst_x0), .y = Scale(draw_texture_state.dst_y0)},
Offset2D{.x = Scale(draw_texture_state.dst_x1), .y = Scale(draw_texture_state.dst_y1)}};
Region2D src_region = {
Offset2D{.x = Scale(draw_texture_state.src_x0), .y = Scale(draw_texture_state.src_y0)},
Offset2D{.x = Scale(draw_texture_state.src_x1), .y = Scale(draw_texture_state.src_y1)}};
Extent3D src_size = {static_cast<u32>(Scale(texture.size.width)),
static_cast<u32>(Scale(texture.size.height)), texture.size.depth};
if (device.HasDrawTexture()) {
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
glDrawTextureNV(texture.DefaultHandle(), sampler->Handle(),
static_cast<f32>(dst_region.start.x), static_cast<f32>(dst_region.start.y),
static_cast<f32>(dst_region.end.x), static_cast<f32>(dst_region.end.y), 0,
draw_texture_state.src_x0 / static_cast<float>(texture.size.width),
draw_texture_state.src_y0 / static_cast<float>(texture.size.height),
draw_texture_state.src_x1 / static_cast<float>(texture.size.width),
draw_texture_state.src_y1 / static_cast<float>(texture.size.height));
} else {
blit_image.BlitColor(texture_cache.GetFramebuffer()->Handle(), texture.DefaultHandle(),
sampler->Handle(), dst_region, src_region, src_size);
state_tracker.InvalidateState();
}
++num_queued_commands;
}
void RasterizerOpenGL::DispatchCompute() {
gpu_memory->FlushCaching();
ComputePipeline* const pipeline{shader_cache.CurrentComputePipeline()};
if (!pipeline) {
return;
}
if (pipeline->UsesLocalMemory()) {
program_manager.LocalMemoryWarmup();
}
pipeline->SetEngine(kepler_compute, gpu_memory);
pipeline->Configure();
const auto& qmd{kepler_compute->launch_description};
auto indirect_address = kepler_compute->GetIndirectComputeAddress();
if (indirect_address) {
// DispatchIndirect
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op);
glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle());
glDispatchComputeIndirect(static_cast<GLintptr>(offset));
return;
}
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
}
void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
const auto query_cache_type = MaxwellToVideoCoreQuery(type);
if (!query_cache_type.has_value()) {
UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type);
return;
}
query_cache.ResetCounter(*query_cache_type);
}
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload, u32 subreport) {
const auto query_cache_type = MaxwellToVideoCoreQuery(type);
if (!query_cache_type.has_value()) {
return QueryFallback(gpu_addr, type, flags, payload, subreport);
}
const bool has_timeout = True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout);
const auto timestamp = has_timeout ? std::optional<u64>{gpu.GetTicks()} : std::nullopt;
query_cache.Query(gpu_addr, *query_cache_type, timestamp);
}
void RasterizerOpenGL::QueryFallback(GPUVAddr gpu_addr, VideoCommon::QueryType type,
VideoCommon::QueryPropertiesFlags flags, u32 payload,
u32 subreport) {
if (type != VideoCommon::QueryType::Payload) {
payload = 1u;
}
std::function<void()> func([this, gpu_addr, flags, memory_manager = gpu_memory, payload]() {
if (True(flags & VideoCommon::QueryPropertiesFlags::HasTimeout)) {
u64 ticks = gpu.GetTicks();
memory_manager->Write<u64>(gpu_addr + 8, ticks);
memory_manager->Write<u64>(gpu_addr, static_cast<u64>(payload));
} else {
memory_manager->Write<u32>(gpu_addr, payload);
}
});
if (True(flags & VideoCommon::QueryPropertiesFlags::IsAFence)) {
SignalFence(std::move(func));
return;
}
func();
}
void RasterizerOpenGL::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
u32 size) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.BindGraphicsUniformBuffer(stage, index, gpu_addr, size);
}
void RasterizerOpenGL::DisableGraphicsUniformBuffer(size_t stage, u32 index) {
buffer_cache.DisableGraphicsUniformBuffer(stage, index);
}
void RasterizerOpenGL::FlushAll() {}
void RasterizerOpenGL::FlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.DownloadMemory(addr, size);
}
if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.DownloadMemory(addr, size);
}
if ((True(which & VideoCommon::CacheType::QueryCache))) {
query_cache.FlushRegion(addr, size);
}
}
bool RasterizerOpenGL::MustFlushRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
if ((True(which & VideoCommon::CacheType::BufferCache))) {
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.IsRegionGpuModified(addr, size)) {
return true;
}
}
if (!Settings::IsGPULevelHigh()) {
return false;
}
if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
return texture_cache.IsRegionGpuModified(addr, size);
}
return false;
}
VideoCore::RasterizerDownloadArea RasterizerOpenGL::GetFlushArea(DAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
auto area = texture_cache.GetFlushArea(addr, size);
if (area) {
return *area;
}
}
{
std::scoped_lock lock{buffer_cache.mutex};
auto area = buffer_cache.GetFlushArea(addr, size);
if (area) {
return *area;
}
}
VideoCore::RasterizerDownloadArea new_area{
.start_address = Common::AlignDown(addr, Core::DEVICE_PAGESIZE),
.end_address = Common::AlignUp(addr + size, Core::DEVICE_PAGESIZE),
.preemtive = true,
};
return new_area;
}
void RasterizerOpenGL::InvalidateRegion(DAddr addr, u64 size, VideoCommon::CacheType which) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
if (True(which & VideoCommon::CacheType::TextureCache)) {
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
if (True(which & VideoCommon::CacheType::BufferCache)) {
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
if (True(which & VideoCommon::CacheType::ShaderCache)) {
shader_cache.InvalidateRegion(addr, size);
}
if (True(which & VideoCommon::CacheType::QueryCache)) {
query_cache.InvalidateRegion(addr, size);
}
}
bool RasterizerOpenGL::OnCPUWrite(DAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return false;
}
{
std::scoped_lock lock{buffer_cache.mutex};
if (buffer_cache.OnCPUWrite(addr, size)) {
return true;
}
}
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
shader_cache.InvalidateRegion(addr, size);
return false;
}
void RasterizerOpenGL::OnCacheInvalidation(DAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
if (addr == 0 || size == 0) {
return;
}
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.WriteMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
shader_cache.InvalidateRegion(addr, size);
}
void RasterizerOpenGL::InvalidateGPUCache() {
gpu.InvalidateGPUCache();
}
void RasterizerOpenGL::UnmapMemory(DAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapMemory(addr, size);
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.WriteMemory(addr, size);
}
shader_cache.OnCacheInvalidation(addr, size);
}
void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.UnmapGPUMemory(as_id, addr, size);
}
}
void RasterizerOpenGL::SignalFence(std::function<void()>&& func) {
fence_manager.SignalFence(std::move(func));
}
void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) {
fence_manager.SyncOperation(std::move(func));
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
fence_manager.SignalSyncPoint(value);
}
void RasterizerOpenGL::SignalReference() {
fence_manager.SignalOrdering();
}
void RasterizerOpenGL::ReleaseFences(bool force) {
fence_manager.WaitPendingFences(force);
}
void RasterizerOpenGL::FlushAndInvalidateRegion(DAddr addr, u64 size,
VideoCommon::CacheType which) {
if (Settings::IsGPULevelExtreme()) {
FlushRegion(addr, size, which);
}
InvalidateRegion(addr, size, which);
}
void RasterizerOpenGL::WaitForIdle() {
glMemoryBarrier(GL_ALL_BARRIER_BITS);
SignalReference();
}
void RasterizerOpenGL::FragmentBarrier() {
glTextureBarrier();
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
}
void RasterizerOpenGL::TiledCacheBarrier() {
glTextureBarrier();
}
void RasterizerOpenGL::FlushCommands() {
// Only flush when we have commands queued to OpenGL.
if (num_queued_commands == 0) {
return;
}
num_queued_commands = 0;
// Make sure memory stored from the previous GL command stream is visible
// This is only needed on assembly shaders where we write to GPU memory with raw pointers
if (has_written_global_memory) {
has_written_global_memory = false;
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
}
glFlush();
}
void RasterizerOpenGL::TickFrame() {
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
num_queued_commands = 0;
fence_manager.TickFrame();
{
std::scoped_lock lock{texture_cache.mutex};
texture_cache.TickFrame();
}
{
std::scoped_lock lock{buffer_cache.mutex};
buffer_cache.TickFrame();
}
}
bool RasterizerOpenGL::AccelerateConditionalRendering() {
gpu_memory->FlushCaching();
if (Settings::IsGPULevelHigh()) {
// Reimplement Host conditional rendering.
return false;
}
// Medium / Low Hack: stub any checks on queries written into the buffer cache.
const GPUVAddr condition_address{maxwell3d->regs.render_enable.Address()};
Maxwell::ReportSemaphore::Compare cmp;
if (gpu_memory->IsMemoryDirty(condition_address, sizeof(cmp),
VideoCommon::CacheType::BufferCache)) {
return true;
}
return false;
}
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Config& copy_config) {
MICROPROFILE_SCOPE(OpenGL_Blits);
std::scoped_lock lock{texture_cache.mutex};
return texture_cache.BlitImage(dst, src, copy_config);
}
Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() {
return accelerate_dma;
}
void RasterizerOpenGL::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
std::span<const u8> memory) {
auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
if (!cpu_addr) [[unlikely]] {
gpu_memory->WriteBlock(address, memory.data(), copy_size);
return;
}
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
{
std::unique_lock<std::recursive_mutex> lock{buffer_cache.mutex};
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
buffer_cache.WriteMemory(*cpu_addr, copy_size);
}
}
{
std::scoped_lock lock_texture{texture_cache.mutex};
texture_cache.WriteMemory(*cpu_addr, copy_size);
}
shader_cache.InvalidateRegion(*cpu_addr, copy_size);
query_cache.InvalidateRegion(*cpu_addr, copy_size);
}
std::optional<FramebufferTextureInfo> RasterizerOpenGL::AccelerateDisplay(
const Tegra::FramebufferConfig& config, DAddr framebuffer_addr, u32 pixel_stride) {
if (framebuffer_addr == 0) {
return {};
}
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
std::scoped_lock lock{texture_cache.mutex};
const auto [image_view, scaled] =
texture_cache.TryFindFramebufferImageView(config, framebuffer_addr);
if (!image_view) {
return {};
}
const auto& resolution = Settings::values.resolution_info;
FramebufferTextureInfo info{};
info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
info.width = image_view->size.width;
info.height = image_view->size.height;
info.scaled_width = scaled ? resolution.ScaleUp(info.width) : info.width;
info.scaled_height = scaled ? resolution.ScaleUp(info.height) : info.height;
return info;
}
void RasterizerOpenGL::SyncState() {
SyncViewport();
SyncRasterizeEnable();
SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
SyncDepthTestState();
SyncDepthClamp();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest();
SyncPointState();
SyncLineState();
SyncPolygonOffset();
SyncAlphaTest();
SyncFramebufferSRGB();
SyncVertexFormats();
SyncVertexInstances();
}
void RasterizerOpenGL::SyncViewport() {
auto& flags = maxwell3d->dirty.flags;
const auto& regs = maxwell3d->regs;
const bool rescale_viewports = flags[VideoCommon::Dirty::RescaleViewports];
const bool dirty_viewport = flags[Dirty::Viewports] || rescale_viewports;
const bool dirty_clip_control = flags[Dirty::ClipControl];
if (dirty_viewport || dirty_clip_control || flags[Dirty::FrontFace]) {
flags[Dirty::FrontFace] = false;
GLenum mode = MaxwellToGL::FrontFace(regs.gl_front_face);
bool flip_faces = true;
if (regs.window_origin.flip_y != 0) {
flip_faces = !flip_faces;
}
if (regs.viewport_transform[0].scale_y < 0.0f) {
flip_faces = !flip_faces;
}
if (flip_faces) {
switch (mode) {
case GL_CW:
mode = GL_CCW;
break;
case GL_CCW:
mode = GL_CW;
break;
}
}
glFrontFace(mode);
}
if (dirty_viewport || dirty_clip_control) {
flags[Dirty::ClipControl] = false;
bool flip_y = false;
if (regs.viewport_transform[0].scale_y < 0.0f) {
flip_y = !flip_y;
}
const bool lower_left{regs.window_origin.mode != Maxwell::WindowOrigin::Mode::UpperLeft};
if (lower_left) {
flip_y = !flip_y;
}
const bool is_zero_to_one = regs.depth_mode == Maxwell::DepthMode::ZeroToOne;
const GLenum origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
const GLenum depth = is_zero_to_one ? GL_ZERO_TO_ONE : GL_NEGATIVE_ONE_TO_ONE;
state_tracker.ClipControl(origin, depth);
state_tracker.SetYNegate(lower_left);
}
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
const auto conv = [scale](float value) -> GLfloat {
float new_value = value * scale;
if (scale < 1.0f) {
const bool sign = std::signbit(value);
new_value = std::round(std::abs(new_value));
new_value = sign ? -new_value : new_value;
}
return static_cast<GLfloat>(new_value);
};
if (dirty_viewport) {
flags[Dirty::Viewports] = false;
const bool force = flags[Dirty::ViewportTransform] || rescale_viewports;
flags[Dirty::ViewportTransform] = false;
flags[VideoCommon::Dirty::RescaleViewports] = false;
for (size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!force && !flags[Dirty::Viewport0 + index]) {
continue;
}
flags[Dirty::Viewport0 + index] = false;
if (!regs.viewport_scale_offset_enabled) {
const auto x = static_cast<GLfloat>(regs.surface_clip.x);
const auto y = static_cast<GLfloat>(regs.surface_clip.y);
const auto width = static_cast<GLfloat>(regs.surface_clip.width);
const auto height = static_cast<GLfloat>(regs.surface_clip.height);
glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f,
height != 0.0f ? height : 1.0f);
continue;
}
const auto& src = regs.viewport_transform[index];
GLfloat x = conv(src.translate_x - src.scale_x);
GLfloat y = conv(src.translate_y - src.scale_y);
GLfloat width = conv(src.scale_x * 2.0f);
GLfloat height = conv(src.scale_y * 2.0f);
if (height < 0) {
y += height;
height = -height;
}
glViewportIndexedf(static_cast<GLuint>(index), x, y, width != 0.0f ? width : 1.0f,
height != 0.0f ? height : 1.0f);
const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
const GLdouble far_depth = src.translate_z + src.scale_z;
if (device.HasDepthBufferFloat()) {
glDepthRangeIndexeddNV(static_cast<GLuint>(index), near_depth, far_depth);
} else {
glDepthRangeIndexed(static_cast<GLuint>(index), near_depth, far_depth);
}
if (!GLAD_GL_NV_viewport_swizzle) {
continue;
}
glViewportSwizzleNV(static_cast<GLuint>(index),
MaxwellToGL::ViewportSwizzle(src.swizzle.x),
MaxwellToGL::ViewportSwizzle(src.swizzle.y),
MaxwellToGL::ViewportSwizzle(src.swizzle.z),
MaxwellToGL::ViewportSwizzle(src.swizzle.w));
}
}
}
void RasterizerOpenGL::SyncDepthClamp() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::DepthClampEnabled]) {
return;
}
flags[Dirty::DepthClampEnabled] = false;
bool depth_clamp_disabled{maxwell3d->regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::Passthrough ||
maxwell3d->regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumXYZ ||
maxwell3d->regs.viewport_clip_control.geometry_clip ==
Maxwell::ViewportClipControl::GeometryClip::FrustumZ};
oglEnable(GL_DEPTH_CLAMP, !depth_clamp_disabled);
}
void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::ClipDistances] && !flags[VideoCommon::Dirty::Shaders]) {
return;
}
flags[Dirty::ClipDistances] = false;
clip_mask &= maxwell3d->regs.user_clip_enable.raw;
if (clip_mask == last_clip_distance_mask) {
return;
}
last_clip_distance_mask = clip_mask;
for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
}
}
void RasterizerOpenGL::SyncClipCoef() {
UNIMPLEMENTED();
}
void RasterizerOpenGL::SyncCullMode() {
auto& flags = maxwell3d->dirty.flags;
const auto& regs = maxwell3d->regs;
if (flags[Dirty::CullTest]) {
flags[Dirty::CullTest] = false;
if (regs.gl_cull_test_enabled) {
glEnable(GL_CULL_FACE);
glCullFace(MaxwellToGL::CullFace(regs.gl_cull_face));
} else {
glDisable(GL_CULL_FACE);
}
}
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PrimitiveRestart]) {
return;
}
flags[Dirty::PrimitiveRestart] = false;
if (maxwell3d->regs.primitive_restart.enabled) {
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(maxwell3d->regs.primitive_restart.index);
} else {
glDisable(GL_PRIMITIVE_RESTART);
}
}
void RasterizerOpenGL::SyncDepthTestState() {
auto& flags = maxwell3d->dirty.flags;
const auto& regs = maxwell3d->regs;
if (flags[Dirty::DepthMask]) {
flags[Dirty::DepthMask] = false;
glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
}
if (flags[Dirty::DepthTest]) {
flags[Dirty::DepthTest] = false;
if (regs.depth_test_enable) {
glEnable(GL_DEPTH_TEST);
glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func));
} else {
glDisable(GL_DEPTH_TEST);
}
}
}
void RasterizerOpenGL::SyncStencilTestState() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::StencilTest]) {
return;
}
flags[Dirty::StencilTest] = false;
const auto& regs = maxwell3d->regs;
oglEnable(GL_STENCIL_TEST, regs.stencil_enable);
glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_op.func),
regs.stencil_front_ref, regs.stencil_front_func_mask);
glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op.fail),
MaxwellToGL::StencilOp(regs.stencil_front_op.zfail),
MaxwellToGL::StencilOp(regs.stencil_front_op.zpass));
glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
if (regs.stencil_two_side_enable) {
glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_op.func),
regs.stencil_back_ref, regs.stencil_back_func_mask);
glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op.fail),
MaxwellToGL::StencilOp(regs.stencil_back_op.zfail),
MaxwellToGL::StencilOp(regs.stencil_back_op.zpass));
glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
} else {
glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF);
}
}
void RasterizerOpenGL::SyncRasterizeEnable() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::RasterizeEnable]) {
return;
}
flags[Dirty::RasterizeEnable] = false;
oglEnable(GL_RASTERIZER_DISCARD, maxwell3d->regs.rasterize_enable == 0);
}
void RasterizerOpenGL::SyncPolygonModes() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PolygonModes]) {
return;
}
flags[Dirty::PolygonModes] = false;
const auto& regs = maxwell3d->regs;
if (regs.fill_via_triangle_mode != Maxwell::FillViaTriangleMode::Disabled) {
if (!GLAD_GL_NV_fill_rectangle) {
LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
return;
}
flags[Dirty::PolygonModeFront] = true;
flags[Dirty::PolygonModeBack] = true;
glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
return;
}
if (regs.polygon_mode_front == regs.polygon_mode_back) {
flags[Dirty::PolygonModeFront] = false;
flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
return;
}
if (flags[Dirty::PolygonModeFront]) {
flags[Dirty::PolygonModeFront] = false;
glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(regs.polygon_mode_front));
}
if (flags[Dirty::PolygonModeBack]) {
flags[Dirty::PolygonModeBack] = false;
glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(regs.polygon_mode_back));
}
}
void RasterizerOpenGL::SyncColorMask() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::ColorMasks]) {
return;
}
flags[Dirty::ColorMasks] = false;
const bool force = flags[Dirty::ColorMaskCommon];
flags[Dirty::ColorMaskCommon] = false;
const auto& regs = maxwell3d->regs;
if (regs.color_mask_common) {
if (!force && !flags[Dirty::ColorMask0]) {
return;
}
flags[Dirty::ColorMask0] = false;
auto& mask = regs.color_mask[0];
glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0);
return;
}
// Path without color_mask_common set
for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
if (!force && !flags[Dirty::ColorMask0 + i]) {
continue;
}
flags[Dirty::ColorMask0 + i] = false;
const auto& mask = regs.color_mask[i];
glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0);
}
}
void RasterizerOpenGL::SyncMultiSampleState() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::MultisampleControl]) {
return;
}
flags[Dirty::MultisampleControl] = false;
const auto& regs = maxwell3d->regs;
oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.anti_alias_alpha_control.alpha_to_coverage);
oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.anti_alias_alpha_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::FragmentClampColor]) {
return;
}
flags[Dirty::FragmentClampColor] = false;
glClampColor(GL_CLAMP_FRAGMENT_COLOR,
maxwell3d->regs.frag_color_clamp.AnyEnabled() ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
auto& flags = maxwell3d->dirty.flags;
const auto& regs = maxwell3d->regs;
if (flags[Dirty::BlendColor]) {
flags[Dirty::BlendColor] = false;
glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
regs.blend_color.a);
}
// TODO(Rodrigo): Revisit blending, there are several registers we are not reading
if (!flags[Dirty::BlendStates]) {
return;
}
flags[Dirty::BlendStates] = false;
if (!regs.blend_per_target_enabled) {
if (!regs.blend.enable[0]) {
glDisable(GL_BLEND);
return;
}
glEnable(GL_BLEND);
glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.color_source),
MaxwellToGL::BlendFunc(regs.blend.color_dest),
MaxwellToGL::BlendFunc(regs.blend.alpha_source),
MaxwellToGL::BlendFunc(regs.blend.alpha_dest));
glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.color_op),
MaxwellToGL::BlendEquation(regs.blend.alpha_op));
return;
}
const bool force = flags[Dirty::BlendIndependentEnabled];
flags[Dirty::BlendIndependentEnabled] = false;
for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
if (!force && !flags[Dirty::BlendState0 + i]) {
continue;
}
flags[Dirty::BlendState0 + i] = false;
if (!regs.blend.enable[i]) {
glDisablei(GL_BLEND, static_cast<GLuint>(i));
continue;
}
glEnablei(GL_BLEND, static_cast<GLuint>(i));
const auto& src = regs.blend_per_target[i];
glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.color_source),
MaxwellToGL::BlendFunc(src.color_dest),
MaxwellToGL::BlendFunc(src.alpha_source),
MaxwellToGL::BlendFunc(src.alpha_dest));
glBlendEquationSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendEquation(src.color_op),
MaxwellToGL::BlendEquation(src.alpha_op));
}
}
void RasterizerOpenGL::SyncLogicOpState() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::LogicOp]) {
return;
}
flags[Dirty::LogicOp] = false;
const auto& regs = maxwell3d->regs;
if (regs.logic_op.enable) {
glEnable(GL_COLOR_LOGIC_OP);
glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.op));
} else {
glDisable(GL_COLOR_LOGIC_OP);
}
}
void RasterizerOpenGL::SyncScissorTest() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::Scissors] && !flags[VideoCommon::Dirty::RescaleScissors]) {
return;
}
flags[Dirty::Scissors] = false;
const bool force = flags[VideoCommon::Dirty::RescaleScissors];
flags[VideoCommon::Dirty::RescaleScissors] = false;
const auto& regs = maxwell3d->regs;
const auto& resolution = Settings::values.resolution_info;
const bool is_rescaling{texture_cache.IsRescaling()};
const u32 up_scale = is_rescaling ? resolution.up_scale : 1U;
const u32 down_shift = is_rescaling ? resolution.down_shift : 0U;
const auto scale_up = [up_scale, down_shift](u32 value) -> u32 {
if (value == 0) {
return 0U;
}
const u32 upset = value * up_scale;
u32 acumm{};
if ((up_scale >> down_shift) == 0) {
acumm = upset % 2;
}
const u32 converted_value = upset >> down_shift;
return std::max<u32>(converted_value + acumm, 1U);
};
for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
if (!force && !flags[Dirty::Scissor0 + index]) {
continue;
}
flags[Dirty::Scissor0 + index] = false;
const auto& src = regs.scissor_test[index];
if (src.enable) {
glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
glScissorIndexed(static_cast<GLuint>(index), scale_up(src.min_x), scale_up(src.min_y),
scale_up(src.max_x - src.min_x), scale_up(src.max_y - src.min_y));
} else {
glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
}
}
}
void RasterizerOpenGL::SyncPointState() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PointSize]) {
return;
}
flags[Dirty::PointSize] = false;
oglEnable(GL_POINT_SPRITE, maxwell3d->regs.point_sprite_enable);
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d->regs.point_size_attribute.enabled);
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
glPointSize(std::max(1.0f, maxwell3d->regs.point_size * scale));
}
void RasterizerOpenGL::SyncLineState() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::LineWidth]) {
return;
}
flags[Dirty::LineWidth] = false;
const auto& regs = maxwell3d->regs;
oglEnable(GL_LINE_SMOOTH, regs.line_anti_alias_enable);
glLineWidth(regs.line_anti_alias_enable ? regs.line_width_smooth : regs.line_width_aliased);
}
void RasterizerOpenGL::SyncPolygonOffset() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::PolygonOffset]) {
return;
}
flags[Dirty::PolygonOffset] = false;
const auto& regs = maxwell3d->regs;
oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable ||
regs.polygon_offset_point_enable) {
// Hardware divides polygon offset units by two
glPolygonOffsetClamp(regs.slope_scale_depth_bias, regs.depth_bias / 2.0f,
regs.depth_bias_clamp);
}
}
void RasterizerOpenGL::SyncAlphaTest() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::AlphaTest]) {
return;
}
flags[Dirty::AlphaTest] = false;
const auto& regs = maxwell3d->regs;
if (regs.alpha_test_enabled) {
glEnable(GL_ALPHA_TEST);
glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
} else {
glDisable(GL_ALPHA_TEST);
}
}
void RasterizerOpenGL::SyncFramebufferSRGB() {
auto& flags = maxwell3d->dirty.flags;
if (!flags[Dirty::FramebufferSRGB]) {
return;
}
flags[Dirty::FramebufferSRGB] = false;
oglEnable(GL_FRAMEBUFFER_SRGB, maxwell3d->regs.framebuffer_srgb);
}
void RasterizerOpenGL::BeginTransformFeedback(GraphicsPipeline* program, GLenum primitive_mode) {
const auto& regs = maxwell3d->regs;
if (regs.transform_feedback_enabled == 0) {
return;
}
program->ConfigureTransformFeedback();
UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderType::TessellationInit) ||
regs.IsShaderConfigEnabled(Maxwell::ShaderType::Tessellation));
// We may have to call BeginTransformFeedbackNV here since they seem to call different
// implementations on Nvidia's driver (the pointer is different) but we are using
// ARB_transform_feedback3 features with NV_transform_feedback interactions and the ARB
// extension doesn't define BeginTransformFeedback (without NV) interactions. It just works.
glBeginTransformFeedback(primitive_mode);
}
void RasterizerOpenGL::EndTransformFeedback() {
if (maxwell3d->regs.transform_feedback_enabled != 0) {
glEndTransformFeedback();
}
}
void RasterizerOpenGL::InitializeChannel(Tegra::Control::ChannelState& channel) {
CreateChannel(channel);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.CreateChannel(channel);
buffer_cache.CreateChannel(channel);
}
shader_cache.CreateChannel(channel);
query_cache.CreateChannel(channel);
state_tracker.SetupTables(channel);
}
void RasterizerOpenGL::BindChannel(Tegra::Control::ChannelState& channel) {
const s32 channel_id = channel.bind_id;
BindToChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.BindToChannel(channel_id);
buffer_cache.BindToChannel(channel_id);
}
shader_cache.BindToChannel(channel_id);
query_cache.BindToChannel(channel_id);
state_tracker.ChangeChannel(channel);
state_tracker.InvalidateState();
}
void RasterizerOpenGL::ReleaseChannel(s32 channel_id) {
EraseChannel(channel_id);
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
texture_cache.EraseChannel(channel_id);
buffer_cache.EraseChannel(channel_id);
}
shader_cache.EraseChannel(channel_id);
query_cache.EraseChannel(channel_id);
}
void RasterizerOpenGL::RegisterTransformFeedback(GPUVAddr tfb_object_addr) {
buffer_cache_runtime.BindTransformFeedbackObject(tfb_object_addr);
}
AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_)
: buffer_cache{buffer_cache_}, texture_cache{texture_cache_} {}
bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
std::scoped_lock lock{buffer_cache.mutex};
return buffer_cache.DMACopy(src_address, dest_address, amount);
}
bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) {
std::scoped_lock lock{buffer_cache.mutex};
return buffer_cache.DMAClear(src_address, amount, value);
}
template <bool IS_IMAGE_UPLOAD>
bool AccelerateDMA::DmaBufferImageCopy(const Tegra::DMA::ImageCopy& copy_info,
const Tegra::DMA::BufferOperand& buffer_operand,
const Tegra::DMA::ImageOperand& image_operand) {
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
const auto image_id = texture_cache.DmaImageId(image_operand, IS_IMAGE_UPLOAD);
if (image_id == VideoCommon::NULL_IMAGE_ID) {
return false;
}
const u32 buffer_size = static_cast<u32>(buffer_operand.pitch * buffer_operand.height);
static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize;
const auto post_op = IS_IMAGE_UPLOAD ? VideoCommon::ObtainBufferOperation::DoNothing
: VideoCommon::ObtainBufferOperation::MarkAsWritten;
const auto [buffer, offset] =
buffer_cache.ObtainBuffer(buffer_operand.address, buffer_size, sync_info, post_op);
const auto [image, copy] = texture_cache.DmaBufferImageCopy(
copy_info, buffer_operand, image_operand, image_id, IS_IMAGE_UPLOAD);
const std::span copy_span{©, 1};
if constexpr (IS_IMAGE_UPLOAD) {
texture_cache.PrepareImage(image_id, true, false);
image->UploadMemory(buffer->Handle(), offset, copy_span);
} else {
if (offset % BytesPerBlock(image->info.format)) {
return false;
}
texture_cache.DownloadImageIntoBuffer(image, buffer->Handle(), offset, copy_span,
buffer_operand.address, buffer_size);
}
return true;
}
bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info,
const Tegra::DMA::ImageOperand& image_operand,
const Tegra::DMA::BufferOperand& buffer_operand) {
return DmaBufferImageCopy<false>(copy_info, buffer_operand, image_operand);
}
bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info,
const Tegra::DMA::BufferOperand& buffer_operand,
const Tegra::DMA::ImageOperand& image_operand) {
return DmaBufferImageCopy<true>(copy_info, buffer_operand, image_operand);
}
} // namespace OpenGL