diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.cpp | 1381 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer_cache.h | 575 |
2 files changed, 0 insertions, 1956 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp deleted file mode 100644 index e27da1fa7..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ /dev/null @@ -1,1381 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <optional> -#include <glad/glad.h> - -#include "common/alignment.h" -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "common/scope_exit.h" -#include "core/core.h" -#include "core/hle/kernel/process.h" -#include "core/settings.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/morton.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_rasterizer_cache.h" -#include "video_core/renderer_opengl/utils.h" -#include "video_core/surface.h" -#include "video_core/textures/convert.h" -#include "video_core/textures/decoders.h" - -namespace OpenGL { - -using VideoCore::MortonSwizzle; -using VideoCore::MortonSwizzleMode; -using VideoCore::Surface::ComponentTypeFromDepthFormat; -using VideoCore::Surface::ComponentTypeFromRenderTarget; -using VideoCore::Surface::ComponentTypeFromTexture; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::PixelFormatFromTextureFormat; -using VideoCore::Surface::SurfaceTargetFromTextureType; - -struct FormatTuple { - GLint internal_format; - GLenum format; - GLenum type; - ComponentType component_type; - bool compressed; -}; - -static void ApplyTextureDefaults(GLuint texture, u32 max_mip_level) { - glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1); - if (max_mip_level == 1) { - glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0); - } -} - -void SurfaceParams::InitCacheParameters(GPUVAddr gpu_addr_) { - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - - gpu_addr = gpu_addr_; - host_ptr = memory_manager.GetPointer(gpu_addr_); - size_in_bytes = SizeInBytesRaw(); - - if (IsPixelFormatASTC(pixel_format)) { - // ASTC is uncompressed in software, in emulated as RGBA8 - size_in_bytes_gl = width * height * depth * 4; - } else { - size_in_bytes_gl = SizeInBytesGL(); - } -} - -std::size_t SurfaceParams::InnerMipmapMemorySize(u32 mip_level, bool force_gl, bool layer_only, - bool uncompressed) const { - const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; - const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; - const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; - u32 m_depth = (layer_only ? 1U : depth); - u32 m_width = MipWidth(mip_level); - u32 m_height = MipHeight(mip_level); - m_width = uncompressed ? m_width : std::max(1U, (m_width + tile_x - 1) / tile_x); - m_height = uncompressed ? m_height : std::max(1U, (m_height + tile_y - 1) / tile_y); - m_depth = std::max(1U, m_depth >> mip_level); - u32 m_block_height = MipBlockHeight(mip_level); - u32 m_block_depth = MipBlockDepth(mip_level); - return Tegra::Texture::CalculateSize(force_gl ? false : is_tiled, bytes_per_pixel, m_width, - m_height, m_depth, m_block_height, m_block_depth); -} - -std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only, - bool uncompressed) const { - std::size_t block_size_bytes = Tegra::Texture::GetGOBSize() * block_height * block_depth; - std::size_t size = 0; - for (u32 i = 0; i < max_mip_level; i++) { - size += InnerMipmapMemorySize(i, force_gl, layer_only, uncompressed); - } - if (!force_gl && is_tiled) { - size = Common::AlignUp(size, block_size_bytes); - } - return size; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForTexture( - const Tegra::Texture::FullTextureInfo& config, const GLShader::SamplerEntry& entry) { - SurfaceParams params{}; - params.is_tiled = config.tic.IsTiled(); - params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, - params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, - params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, - params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; - params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); - params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), - params.srgb_conversion); - - if (config.tsc.depth_compare_enabled) { - // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled, - // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also - // causes GetFormatType to properly return 'Depth' below). - if (GetFormatType(params.pixel_format) == SurfaceType::ColorTexture) { - switch (params.pixel_format) { - case PixelFormat::R16S: - case PixelFormat::R16U: - case PixelFormat::R16F: - params.pixel_format = PixelFormat::Z16; - break; - case PixelFormat::R32F: - params.pixel_format = PixelFormat::Z32F; - break; - default: - LOG_WARNING(HW_GPU, "Color texture format being used with depth compare: {}", - static_cast<u32>(params.pixel_format)); - break; - } - } - } - - params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); - params.type = GetFormatType(params.pixel_format); - UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled); - - params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); - params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); - if (config.tic.IsLineal()) { - params.pitch = config.tic.Pitch(); - } - params.unaligned_height = config.tic.Height(); - params.target = SurfaceTargetFromTextureType(config.tic.texture_type); - params.identity = SurfaceClass::Uploaded; - - switch (params.target) { - case SurfaceTarget::Texture1D: - case SurfaceTarget::TextureBuffer: - case SurfaceTarget::Texture2D: - params.depth = 1; - break; - case SurfaceTarget::TextureCubemap: - params.depth = config.tic.Depth() * 6; - break; - case SurfaceTarget::Texture3D: - params.depth = config.tic.Depth(); - break; - case SurfaceTarget::Texture2DArray: - params.depth = config.tic.Depth(); - if (!entry.IsArray()) { - // TODO(bunnei): We have seen games re-use a Texture2D as Texture2DArray with depth of - // one, but sample the texture in the shader as if it were not an array texture. This - // probably is valid on hardware, but we still need to write a test to confirm this. In - // emulation, the workaround here is to continue to treat this as a Texture2D. An - // example game that does this is Super Mario Odyssey (in Cloud Kingdom). - ASSERT(params.depth == 1); - params.target = SurfaceTarget::Texture2D; - } - break; - case SurfaceTarget::TextureCubeArray: - params.depth = config.tic.Depth() * 6; - if (!entry.IsArray()) { - ASSERT(params.depth == 6); - params.target = SurfaceTarget::TextureCubemap; - } - break; - default: - LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target)); - UNREACHABLE(); - params.depth = 1; - break; - } - - params.is_layered = SurfaceTargetIsLayered(params.target); - params.is_array = SurfaceTargetIsArray(params.target); - params.max_mip_level = config.tic.max_mip_level + 1; - params.rt = {}; - - params.InitCacheParameters(config.tic.Address()); - - return params; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(std::size_t index) { - const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]}; - SurfaceParams params{}; - - params.is_tiled = - config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.block_width = 1 << config.memory_layout.block_width; - params.block_height = 1 << config.memory_layout.block_height; - params.block_depth = 1 << config.memory_layout.block_depth; - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.component_type = ComponentTypeFromRenderTarget(config.format); - params.type = GetFormatType(params.pixel_format); - if (params.is_tiled) { - params.width = config.width; - } else { - params.pitch = config.width; - const u32 bpp = params.GetFormatBpp() / 8; - params.width = params.pitch / bpp; - } - params.height = config.height; - params.unaligned_height = config.height; - params.target = SurfaceTarget::Texture2D; - params.identity = SurfaceClass::RenderTarget; - params.depth = 1; - params.max_mip_level = 1; - params.is_layered = false; - - // Render target specific parameters, not used for caching - params.rt.index = static_cast<u32>(index); - params.rt.array_mode = config.array_mode; - params.rt.layer_stride = config.layer_stride; - params.rt.volume = config.volume; - params.rt.base_layer = config.base_layer; - - params.InitCacheParameters(config.Address()); - - return params; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForDepthBuffer( - u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { - SurfaceParams params{}; - - params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; - params.block_width = 1 << std::min(block_width, 5U); - params.block_height = 1 << std::min(block_height, 5U); - params.block_depth = 1 << std::min(block_depth, 5U); - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromDepthFormat(format); - params.component_type = ComponentTypeFromDepthFormat(format); - params.type = GetFormatType(params.pixel_format); - params.srgb_conversion = false; - params.width = zeta_width; - params.height = zeta_height; - params.unaligned_height = zeta_height; - params.target = SurfaceTarget::Texture2D; - params.identity = SurfaceClass::DepthBuffer; - params.depth = 1; - params.max_mip_level = 1; - params.is_layered = false; - params.rt = {}; - - params.InitCacheParameters(zeta_address); - - return params; -} - -/*static*/ SurfaceParams SurfaceParams::CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config) { - SurfaceParams params{}; - - params.is_tiled = !config.linear; - params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, - params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, - params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, - params.tile_width_spacing = 1; - params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); - params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || - config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; - params.component_type = ComponentTypeFromRenderTarget(config.format); - params.type = GetFormatType(params.pixel_format); - params.width = config.width; - params.pitch = config.pitch; - params.height = config.height; - params.unaligned_height = config.height; - params.target = SurfaceTarget::Texture2D; - params.identity = SurfaceClass::Copy; - params.depth = 1; - params.max_mip_level = 1; - params.rt = {}; - - params.InitCacheParameters(config.Address()); - - return params; -} - -static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U - {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S - {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI - {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U - {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, - false}, // A2B10G10R10U - {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U - {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U - {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI - {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F - {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U - {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI - {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, - false}, // R11FG11FB10F - {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI - {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT1 - {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT23 - {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT45 - {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1 - {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXN2UNORM - {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM - {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U - {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, - true}, // BC6H_UF16 - {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, - true}, // BC6H_SF16 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4 - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 - {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F - {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F - {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F - {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F - {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U - {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S - {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI - {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I - {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16 - {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F - {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI - {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I - {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S - {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, - false}, // RGBA8_SRGB - {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U - {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S - {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI - {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5 - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4 - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8 - // Compressed sRGB formats - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT1_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT23_SRGB - {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // DXT45_SRGB - {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, - true}, // BC7U_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB - {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8 - {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB - - // Depth formats - {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F - {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, - false}, // Z16 - - // DepthStencil formats - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // Z24S8 - {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, - false}, // S8Z24 - {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, - ComponentType::Float, false}, // Z32FS8 -}}; - -static GLenum SurfaceTargetToGL(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return GL_TEXTURE_1D; - case SurfaceTarget::TextureBuffer: - return GL_TEXTURE_BUFFER; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D; - case SurfaceTarget::Texture3D: - return GL_TEXTURE_3D; - case SurfaceTarget::Texture1DArray: - return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2DArray: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP; - case SurfaceTarget::TextureCubeArray: - return GL_TEXTURE_CUBE_MAP_ARRAY; - } - LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target)); - UNREACHABLE(); - return {}; -} - -static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { - ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); - auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)]; - ASSERT(component_type == format.component_type); - - return format; -} - -/// Returns the discrepant array target -constexpr GLenum GetArrayDiscrepantTarget(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return GL_TEXTURE_1D_ARRAY; - case SurfaceTarget::Texture2D: - return GL_TEXTURE_2D_ARRAY; - case SurfaceTarget::Texture3D: - return GL_NONE; - case SurfaceTarget::Texture1DArray: - return GL_TEXTURE_1D; - case SurfaceTarget::Texture2DArray: - return GL_TEXTURE_2D; - case SurfaceTarget::TextureCubemap: - return GL_TEXTURE_CUBE_MAP_ARRAY; - case SurfaceTarget::TextureCubeArray: - return GL_TEXTURE_CUBE_MAP; - } - return GL_NONE; -} - -Common::Rectangle<u32> SurfaceParams::GetRect(u32 mip_level) const { - u32 actual_height{std::max(1U, unaligned_height >> mip_level)}; - if (IsPixelFormatASTC(pixel_format)) { - // ASTC formats must stop at the ATSC block size boundary - actual_height = Common::AlignDown(actual_height, GetASTCBlockSize(pixel_format).second); - } - return {0, actual_height, MipWidth(mip_level), 0}; -} - -void SwizzleFunc(const MortonSwizzleMode& mode, const SurfaceParams& params, - std::vector<u8>& gl_buffer, u32 mip_level) { - u32 depth = params.MipDepth(mip_level); - if (params.target == SurfaceTarget::Texture2D) { - // TODO(Blinkhawk): Eliminate this condition once all texture types are implemented. - depth = 1U; - } - if (params.is_layered) { - u64 offset = params.GetMipmapLevelOffset(mip_level); - u64 offset_gl = 0; - const u64 layer_size = params.LayerMemorySize(); - const u64 gl_size = params.LayerSizeGL(mip_level); - for (u32 i = 0; i < params.depth; i++) { - MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), - params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), 1, params.tile_width_spacing, - gl_buffer.data() + offset_gl, params.host_ptr + offset); - offset += layer_size; - offset_gl += gl_size; - } - } else { - const u64 offset = params.GetMipmapLevelOffset(mip_level); - MortonSwizzle(mode, params.pixel_format, params.MipWidth(mip_level), - params.MipBlockHeight(mip_level), params.MipHeight(mip_level), - params.MipBlockDepth(mip_level), depth, params.tile_width_spacing, - gl_buffer.data(), params.host_ptr + offset); - } -} - -void RasterizerCacheOpenGL::FastCopySurface(const Surface& src_surface, - const Surface& dst_surface) { - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - const u32 width{std::min(src_params.width, dst_params.width)}; - const u32 height{std::min(src_params.height, dst_params.height)}; - - glCopyImageSubData(src_surface->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, 0, - 0, dst_surface->Texture().handle, SurfaceTargetToGL(dst_params.target), 0, 0, - 0, 0, width, height, 1); - - dst_surface->MarkAsModified(true, *this); -} - -MICROPROFILE_DEFINE(OpenGL_CopySurface, "OpenGL", "CopySurface", MP_RGB(128, 192, 64)); -void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surface& dst_surface, - const GLuint copy_pbo_handle, const GLenum src_attachment, - const GLenum dst_attachment, - const std::size_t cubemap_face) { - MICROPROFILE_SCOPE(OpenGL_CopySurface); - ASSERT_MSG(dst_attachment == 0, "Unimplemented"); - - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); - const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); - - const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); - glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY); - if (source_format.compressed) { - glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment, - static_cast<GLsizei>(src_params.size_in_bytes), nullptr); - } else { - glGetTextureImage(src_surface->Texture().handle, src_attachment, source_format.format, - source_format.type, static_cast<GLsizei>(src_params.size_in_bytes), - nullptr); - } - // If the new texture is bigger than the previous one, we need to fill in the rest with data - // from the CPU. - if (src_params.size_in_bytes < dst_params.size_in_bytes) { - // Upload the rest of the memory. - if (dst_params.is_tiled) { - // TODO(Subv): We might have to de-tile the subtexture and re-tile it with the rest - // of the data in this case. Games like Super Mario Odyssey seem to hit this case - // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer - // but it doesn't clear it beforehand, the texture is already full of zeros. - LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during " - "reinterpretation but the texture is tiled."); - } - const std::size_t remaining_size = dst_params.size_in_bytes - src_params.size_in_bytes; - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - glBufferSubData(GL_PIXEL_PACK_BUFFER, src_params.size_in_bytes, remaining_size, - memory_manager.GetPointer(dst_params.gpu_addr + src_params.size_in_bytes)); - } - - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - - const GLsizei width{static_cast<GLsizei>( - std::min(src_params.GetRect().GetWidth(), dst_params.GetRect().GetWidth()))}; - const GLsizei height{static_cast<GLsizei>( - std::min(src_params.GetRect().GetHeight(), dst_params.GetRect().GetHeight()))}; - - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); - if (dest_format.compressed) { - LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!"); - UNREACHABLE(); - } else { - switch (dst_params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(dst_surface->Texture().handle, 0, 0, width, dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(dst_surface->Texture().handle, 0, 0, 0, width, height, - dest_format.format, dest_format.type, nullptr); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, 0, width, height, - static_cast<GLsizei>(dst_params.depth), dest_format.format, - dest_format.type, nullptr); - break; - case SurfaceTarget::TextureCubemap: - glTextureSubImage3D(dst_surface->Texture().handle, 0, 0, 0, - static_cast<GLint>(cubemap_face), width, height, 1, - dest_format.format, dest_format.type, nullptr); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast<u32>(dst_params.target)); - UNREACHABLE(); - } - glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); - } - - dst_surface->MarkAsModified(true, *this); -} - -CachedSurface::CachedSurface(const SurfaceParams& params) - : RasterizerCacheObject{params.host_ptr}, params{params}, - gl_target{SurfaceTargetToGL(params.target)}, cached_size_in_bytes{params.size_in_bytes} { - - const auto optional_cpu_addr{ - Core::System::GetInstance().GPU().MemoryManager().GpuToCpuAddress(params.gpu_addr)}; - ASSERT_MSG(optional_cpu_addr, "optional_cpu_addr is invalid"); - cpu_addr = *optional_cpu_addr; - - texture.Create(gl_target); - - // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0) - // alternatives. This signals a bug on those functions. - const auto width = static_cast<GLsizei>(params.MipWidth(0)); - const auto height = static_cast<GLsizei>(params.MipHeight(0)); - memory_size = params.MemorySize(); - reinterpreted = false; - - const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type); - gl_internal_format = format_tuple.internal_format; - - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureStorage1D(texture.handle, params.max_mip_level, gl_internal_format, width); - break; - case SurfaceTarget::TextureBuffer: - texture_buffer.Create(); - glNamedBufferStorage(texture_buffer.handle, - params.width * GetBytesPerPixel(params.pixel_format), nullptr, - GL_DYNAMIC_STORAGE_BIT); - glTextureBuffer(texture.handle, gl_internal_format, texture_buffer.handle); - break; - case SurfaceTarget::Texture2D: - case SurfaceTarget::TextureCubemap: - glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height); - break; - case SurfaceTarget::Texture3D: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureStorage3D(texture.handle, params.max_mip_level, gl_internal_format, width, height, - params.depth); - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast<u32>(params.target)); - UNREACHABLE(); - glTextureStorage2D(texture.handle, params.max_mip_level, gl_internal_format, width, height); - } - - if (params.target != SurfaceTarget::TextureBuffer) { - ApplyTextureDefaults(texture.handle, params.max_mip_level); - } - - OpenGL::LabelGLObject(GL_TEXTURE, texture.handle, params.gpu_addr, params.IdentityString()); -} - -MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 192, 64)); -void CachedSurface::LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { - MICROPROFILE_SCOPE(OpenGL_SurfaceLoad); - auto& gl_buffer = res_cache_tmp_mem.gl_buffer; - if (gl_buffer.size() < params.max_mip_level) - gl_buffer.resize(params.max_mip_level); - for (u32 i = 0; i < params.max_mip_level; i++) - gl_buffer[i].resize(params.GetMipmapSizeGL(i)); - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", - params.block_width, static_cast<u32>(params.target)); - for (u32 i = 0; i < params.max_mip_level; i++) - SwizzleFunc(MortonSwizzleMode::MortonToLinear, params, gl_buffer[i], i); - } else { - const u32 bpp = params.GetFormatBpp() / 8; - const u32 copy_size = (params.width * bpp + GetDefaultBlockWidth(params.pixel_format) - 1) / - GetDefaultBlockWidth(params.pixel_format); - if (params.pitch == copy_size) { - std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); - } else { - const u32 height = (params.height + GetDefaultBlockHeight(params.pixel_format) - 1) / - GetDefaultBlockHeight(params.pixel_format); - const u8* start{params.host_ptr}; - u8* write_to = gl_buffer[0].data(); - for (u32 h = height; h > 0; h--) { - std::memcpy(write_to, start, copy_size); - start += params.pitch; - write_to += copy_size; - } - } - } - for (u32 i = 0; i < params.max_mip_level; i++) { - const u32 width = params.MipWidth(i); - const u32 height = params.MipHeight(i); - const u32 depth = params.MipDepth(i); - if (VideoCore::Surface::IsPixelFormatASTC(params.pixel_format)) { - // Reserve size for RGBA8 conversion - constexpr std::size_t rgba_bpp = 4; - gl_buffer[i].resize(std::max(gl_buffer[i].size(), width * height * depth * rgba_bpp)); - } - Tegra::Texture::ConvertFromGuestToHost(gl_buffer[i].data(), params.pixel_format, width, - height, depth, true, true); - } -} - -MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); -void CachedSurface::FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem) { - MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); - - ASSERT_MSG(!IsPixelFormatASTC(params.pixel_format), "Unimplemented"); - - auto& gl_buffer = res_cache_tmp_mem.gl_buffer; - // OpenGL temporary buffer needs to be big enough to store raw texture size - gl_buffer[0].resize(GetSizeInBytes()); - - const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - const u32 align = std::clamp(params.RowAlign(0), 1U, 8U); - glPixelStorei(GL_PACK_ALIGNMENT, align); - glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); - ASSERT(!tuple.compressed); - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, - static_cast<GLsizei>(gl_buffer[0].size()), gl_buffer[0].data()); - glPixelStorei(GL_PACK_ROW_LENGTH, 0); - Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, - params.height, params.depth, true, true); - if (params.is_tiled) { - ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", - params.block_width, static_cast<u32>(params.target)); - - SwizzleFunc(MortonSwizzleMode::LinearToMorton, params, gl_buffer[0], 0); - } else { - const u32 bpp = params.GetFormatBpp() / 8; - const u32 copy_size = params.width * bpp; - if (params.pitch == copy_size) { - std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); - } else { - u8* start{params.host_ptr}; - const u8* read_to = gl_buffer[0].data(); - for (u32 h = params.height; h > 0; h--) { - std::memcpy(start, read_to, copy_size); - start += params.pitch; - read_to += copy_size; - } - } - } -} - -void CachedSurface::UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, - GLuint read_fb_handle, GLuint draw_fb_handle) { - const auto& rect{params.GetRect(mip_map)}; - - auto& gl_buffer = res_cache_tmp_mem.gl_buffer; - - // Load data from memory to the surface - const auto x0 = static_cast<GLint>(rect.left); - const auto y0 = static_cast<GLint>(rect.bottom); - auto buffer_offset = - static_cast<std::size_t>(static_cast<std::size_t>(y0) * params.MipWidth(mip_map) + - static_cast<std::size_t>(x0)) * - GetBytesPerPixel(params.pixel_format); - - const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); - - const u32 align = std::clamp(params.RowAlign(mip_map), 1U, 8U); - glPixelStorei(GL_UNPACK_ALIGNMENT, align); - glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.MipWidth(mip_map))); - - const auto image_size = static_cast<GLsizei>(params.GetMipmapSizeGL(mip_map, false)); - if (tuple.compressed) { - switch (params.target) { - case SurfaceTarget::Texture2D: - glCompressedTextureSubImage2D( - texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), - static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, image_size, - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture3D: - glCompressedTextureSubImage3D( - texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), - static_cast<GLsizei>(params.MipHeight(mip_map)), - static_cast<GLsizei>(params.MipDepth(mip_map)), tuple.internal_format, image_size, - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glCompressedTextureSubImage3D( - texture.handle, mip_map, 0, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), - static_cast<GLsizei>(params.MipHeight(mip_map)), static_cast<GLsizei>(params.depth), - tuple.internal_format, image_size, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::TextureCubemap: { - const auto layer_size = static_cast<GLsizei>(params.LayerSizeGL(mip_map)); - for (std::size_t face = 0; face < params.depth; ++face) { - glCompressedTextureSubImage3D( - texture.handle, mip_map, 0, 0, static_cast<GLint>(face), - static_cast<GLsizei>(params.MipWidth(mip_map)), - static_cast<GLsizei>(params.MipHeight(mip_map)), 1, tuple.internal_format, - layer_size, &gl_buffer[mip_map][buffer_offset]); - buffer_offset += layer_size; - } - break; - } - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast<u32>(params.target)); - UNREACHABLE(); - glCompressedTextureSubImage2D( - texture.handle, mip_map, 0, 0, static_cast<GLsizei>(params.MipWidth(mip_map)), - static_cast<GLsizei>(params.MipHeight(mip_map)), tuple.internal_format, - static_cast<GLsizei>(params.size_in_bytes_gl), &gl_buffer[mip_map][buffer_offset]); - } - } else { - switch (params.target) { - case SurfaceTarget::Texture1D: - glTextureSubImage1D(texture.handle, mip_map, x0, static_cast<GLsizei>(rect.GetWidth()), - tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::TextureBuffer: - ASSERT(mip_map == 0); - glNamedBufferSubData(texture_buffer.handle, x0, - static_cast<GLsizeiptr>(rect.GetWidth()) * - GetBytesPerPixel(params.pixel_format), - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture2D: - glTextureSubImage2D(texture.handle, mip_map, x0, y0, - static_cast<GLsizei>(rect.GetWidth()), - static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture3D: - glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, - static_cast<GLsizei>(rect.GetWidth()), - static_cast<GLsizei>(rect.GetHeight()), params.MipDepth(mip_map), - tuple.format, tuple.type, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - glTextureSubImage3D(texture.handle, mip_map, x0, y0, 0, - static_cast<GLsizei>(rect.GetWidth()), - static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format, - tuple.type, &gl_buffer[mip_map][buffer_offset]); - break; - case SurfaceTarget::TextureCubemap: { - for (std::size_t face = 0; face < params.depth; ++face) { - glTextureSubImage3D(texture.handle, mip_map, x0, y0, static_cast<GLint>(face), - static_cast<GLsizei>(rect.GetWidth()), - static_cast<GLsizei>(rect.GetHeight()), 1, tuple.format, - tuple.type, &gl_buffer[mip_map][buffer_offset]); - buffer_offset += params.LayerSizeGL(mip_map); - } - break; - } - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast<u32>(params.target)); - UNREACHABLE(); - glTextureSubImage2D(texture.handle, mip_map, x0, y0, - static_cast<GLsizei>(rect.GetWidth()), - static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type, - &gl_buffer[mip_map][buffer_offset]); - } - } - - glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); -} - -void CachedSurface::EnsureTextureDiscrepantView() { - if (discrepant_view.handle != 0) - return; - - const GLenum target{GetArrayDiscrepantTarget(params.target)}; - ASSERT(target != GL_NONE); - - const GLuint num_layers{target == GL_TEXTURE_CUBE_MAP_ARRAY ? 6u : 1u}; - constexpr GLuint min_layer = 0; - constexpr GLuint min_level = 0; - - glGenTextures(1, &discrepant_view.handle); - glTextureView(discrepant_view.handle, target, texture.handle, gl_internal_format, min_level, - params.max_mip_level, min_layer, num_layers); - ApplyTextureDefaults(discrepant_view.handle, params.max_mip_level); - glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, - reinterpret_cast<const GLint*>(swizzle.data())); -} - -MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64)); -void CachedSurface::UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, - GLuint read_fb_handle, GLuint draw_fb_handle) { - MICROPROFILE_SCOPE(OpenGL_TextureUL); - - for (u32 i = 0; i < params.max_mip_level; i++) - UploadGLMipmapTexture(res_cache_tmp_mem, i, read_fb_handle, draw_fb_handle); -} - -void CachedSurface::UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, - Tegra::Texture::SwizzleSource swizzle_y, - Tegra::Texture::SwizzleSource swizzle_z, - Tegra::Texture::SwizzleSource swizzle_w) { - if (params.target == SurfaceTarget::TextureBuffer) { - return; - } - const GLenum new_x = MaxwellToGL::SwizzleSource(swizzle_x); - const GLenum new_y = MaxwellToGL::SwizzleSource(swizzle_y); - const GLenum new_z = MaxwellToGL::SwizzleSource(swizzle_z); - const GLenum new_w = MaxwellToGL::SwizzleSource(swizzle_w); - if (swizzle[0] == new_x && swizzle[1] == new_y && swizzle[2] == new_z && swizzle[3] == new_w) { - return; - } - swizzle = {new_x, new_y, new_z, new_w}; - const auto swizzle_data = reinterpret_cast<const GLint*>(swizzle.data()); - glTextureParameteriv(texture.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); - if (discrepant_view.handle != 0) { - glTextureParameteriv(discrepant_view.handle, GL_TEXTURE_SWIZZLE_RGBA, swizzle_data); - } -} - -RasterizerCacheOpenGL::RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} { - read_framebuffer.Create(); - draw_framebuffer.Create(); - copy_pbo.Create(); -} - -Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, - const GLShader::SamplerEntry& entry) { - return GetSurface(SurfaceParams::CreateForTexture(config, entry)); -} - -Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) { - auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; - const auto& regs{gpu.regs}; - - if (!gpu.dirty_flags.zeta_buffer) { - return last_depth_buffer; - } - gpu.dirty_flags.zeta_buffer = false; - - if (!regs.zeta.Address() || !regs.zeta_enable) { - return last_depth_buffer = {}; - } - - SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer( - regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format, - regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, - regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; - - return last_depth_buffer = GetSurface(depth_params, preserve_contents); -} - -Surface RasterizerCacheOpenGL::GetColorBufferSurface(std::size_t index, bool preserve_contents) { - auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; - const auto& regs{gpu.regs}; - - if (!gpu.dirty_flags.color_buffer[index]) { - return current_color_buffers[index]; - } - gpu.dirty_flags.color_buffer.reset(index); - - ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); - - if (index >= regs.rt_control.count) { - return current_color_buffers[index] = {}; - } - - if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { - return current_color_buffers[index] = {}; - } - - const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)}; - - return current_color_buffers[index] = GetSurface(color_params, preserve_contents); -} - -void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) { - surface->LoadGLBuffer(temporal_memory); - surface->UploadGLTexture(temporal_memory, read_framebuffer.handle, draw_framebuffer.handle); - surface->MarkAsModified(false, *this); - surface->MarkForReload(false); -} - -Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool preserve_contents) { - if (!params.IsValid()) { - return {}; - } - - // Look up surface in the cache based on address - Surface surface{TryGet(params.host_ptr)}; - if (surface) { - if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { - // Use the cached surface as-is unless it's not synced with memory - if (surface->MustReload()) - LoadSurface(surface); - return surface; - } else if (preserve_contents) { - // If surface parameters changed and we care about keeping the previous data, recreate - // the surface from the old one - Surface new_surface{RecreateSurface(surface, params)}; - Unregister(surface); - Register(new_surface); - if (new_surface->IsUploaded()) { - RegisterReinterpretSurface(new_surface); - } - return new_surface; - } else { - // Delete the old surface before creating a new one to prevent collisions. - Unregister(surface); - } - } - - // No cached surface found - get a new one - surface = GetUncachedSurface(params); - Register(surface); - - // Only load surface from memory if we care about the contents - if (preserve_contents) { - LoadSurface(surface); - } - - return surface; -} - -Surface RasterizerCacheOpenGL::GetUncachedSurface(const SurfaceParams& params) { - Surface surface{TryGetReservedSurface(params)}; - if (!surface) { - // No reserved surface available, create a new one and reserve it - surface = std::make_shared<CachedSurface>(params); - ReserveSurface(surface); - } - return surface; -} - -void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, - const Surface& dst_surface) { - const auto& init_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - GPUVAddr address{init_params.gpu_addr}; - const std::size_t layer_size{dst_params.LayerMemorySize()}; - for (u32 layer = 0; layer < dst_params.depth; layer++) { - for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { - const GPUVAddr sub_address{address + dst_params.GetMipmapLevelOffset(mipmap)}; - const Surface& copy{TryGet(memory_manager.GetPointer(sub_address))}; - if (!copy) { - continue; - } - const auto& src_params{copy->GetSurfaceParams()}; - const u32 width{std::min(src_params.width, dst_params.MipWidth(mipmap))}; - const u32 height{std::min(src_params.height, dst_params.MipHeight(mipmap))}; - - glCopyImageSubData(copy->Texture().handle, SurfaceTargetToGL(src_params.target), 0, 0, - 0, 0, dst_surface->Texture().handle, - SurfaceTargetToGL(dst_params.target), mipmap, 0, 0, layer, width, - height, 1); - } - address += layer_size; - } - - dst_surface->MarkAsModified(true, *this); -} - -static bool BlitSurface(const Surface& src_surface, const Surface& dst_surface, - const Common::Rectangle<u32>& src_rect, - const Common::Rectangle<u32>& dst_rect, GLuint read_fb_handle, - GLuint draw_fb_handle, GLenum src_attachment = 0, GLenum dst_attachment = 0, - std::size_t cubemap_face = 0) { - - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ prev_state.Apply(); }); - - OpenGLState state; - state.draw.read_framebuffer = read_fb_handle; - state.draw.draw_framebuffer = draw_fb_handle; - state.Apply(); - - u32 buffers{}; - - if (src_params.type == SurfaceType::ColorTexture) { - switch (src_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - src_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - src_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - SurfaceTargetToGL(src_params.target), - src_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(src_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - switch (dst_params.target) { - case SurfaceTarget::Texture2D: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - case SurfaceTarget::TextureCubemap: - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), - dst_surface->Texture().handle, 0); - glFramebufferTexture2D( - GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - static_cast<GLenum>(GL_TEXTURE_CUBE_MAP_POSITIVE_X + cubemap_face), 0, 0); - break; - case SurfaceTarget::Texture2DArray: - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - dst_surface->Texture().handle, 0, 0); - glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, 0, 0, 0); - break; - - case SurfaceTarget::Texture3D: - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - SurfaceTargetToGL(dst_params.target), - dst_surface->Texture().handle, 0, 0); - glFramebufferTexture3D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - SurfaceTargetToGL(dst_params.target), 0, 0, 0); - break; - default: - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - 0, 0); - break; - } - - buffers = GL_COLOR_BUFFER_BIT; - } else if (src_params.type == SurfaceType::Depth) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); - - buffers = GL_DEPTH_BUFFER_BIT; - } else if (src_params.type == SurfaceType::DepthStencil) { - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + src_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - src_surface->Texture().handle, 0); - - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + dst_attachment, - GL_TEXTURE_2D, 0, 0); - glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, - dst_surface->Texture().handle, 0); - - buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; - } - - glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, - dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, - buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); - - return true; -} - -void RasterizerCacheOpenGL::FermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& dst_rect) { - - const auto& src_params = SurfaceParams::CreateForFermiCopySurface(src_config); - const auto& dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config); - - ASSERT(src_params.pixel_format == dst_params.pixel_format); - ASSERT(src_params.block_height == dst_params.block_height); - ASSERT(src_params.is_tiled == dst_params.is_tiled); - ASSERT(src_params.depth == dst_params.depth); - ASSERT(src_params.target == dst_params.target); - ASSERT(src_params.rt.index == dst_params.rt.index); - - auto src_surface = GetSurface(src_params, true); - auto dst_surface = GetSurface(dst_params, true); - - BlitSurface(src_surface, dst_surface, src_rect, dst_rect, read_framebuffer.handle, - draw_framebuffer.handle); - - dst_surface->MarkAsModified(true, *this); -} - -void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, - const Surface& dst_surface) { - const auto& src_params{src_surface->GetSurfaceParams()}; - const auto& dst_params{dst_surface->GetSurfaceParams()}; - - // Flush enough memory for both the source and destination surface - FlushRegion(ToCacheAddr(src_params.host_ptr), - std::max(src_params.MemorySize(), dst_params.MemorySize())); - - LoadSurface(dst_surface); -} - -Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, - const SurfaceParams& new_params) { - // Verify surface is compatible for blitting - auto old_params{old_surface->GetSurfaceParams()}; - - // Get a new surface with the new parameters, and blit the previous surface to it - Surface new_surface{GetUncachedSurface(new_params)}; - - // With use_accurate_gpu_emulation enabled, do an accurate surface copy - if (Settings::values.use_accurate_gpu_emulation) { - AccurateCopySurface(old_surface, new_surface); - return new_surface; - } - - const bool old_compressed = - GetFormatTuple(old_params.pixel_format, old_params.component_type).compressed; - const bool new_compressed = - GetFormatTuple(new_params.pixel_format, new_params.component_type).compressed; - const bool compatible_formats = - GetFormatBpp(old_params.pixel_format) == GetFormatBpp(new_params.pixel_format) && - !(old_compressed || new_compressed); - // For compatible surfaces, we can just do fast glCopyImageSubData based copy - if (old_params.target == new_params.target && old_params.depth == new_params.depth && - old_params.depth == 1 && compatible_formats) { - FastCopySurface(old_surface, new_surface); - return new_surface; - } - - switch (new_params.target) { - case SurfaceTarget::Texture2D: - CopySurface(old_surface, new_surface, copy_pbo.handle); - break; - case SurfaceTarget::Texture3D: - AccurateCopySurface(old_surface, new_surface); - break; - case SurfaceTarget::TextureCubemap: - case SurfaceTarget::Texture2DArray: - case SurfaceTarget::TextureCubeArray: - if (compatible_formats) - FastLayeredCopySurface(old_surface, new_surface); - else { - AccurateCopySurface(old_surface, new_surface); - } - break; - default: - LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", - static_cast<u32>(new_params.target)); - UNREACHABLE(); - } - - return new_surface; -} - -Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { - return TryGet(host_ptr); -} - -void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { - const auto& surface_reserve_key{SurfaceReserveKey::Create(surface->GetSurfaceParams())}; - surface_reserve[surface_reserve_key] = surface; -} - -Surface RasterizerCacheOpenGL::TryGetReservedSurface(const SurfaceParams& params) { - const auto& surface_reserve_key{SurfaceReserveKey::Create(params)}; - auto search{surface_reserve.find(surface_reserve_key)}; - if (search != surface_reserve.end()) { - return search->second; - } - return {}; -} - -static std::optional<u32> TryFindBestMipMap(std::size_t memory, const SurfaceParams params, - u32 height) { - for (u32 i = 0; i < params.max_mip_level; i++) { - if (memory == params.GetMipmapSingleSize(i) && params.MipHeight(i) == height) { - return {i}; - } - } - return {}; -} - -static std::optional<u32> TryFindBestLayer(GPUVAddr addr, const SurfaceParams params, u32 mipmap) { - const std::size_t size{params.LayerMemorySize()}; - GPUVAddr start{params.gpu_addr + params.GetMipmapLevelOffset(mipmap)}; - for (u32 i = 0; i < params.depth; i++) { - if (start == addr) { - return {i}; - } - start += size; - } - return {}; -} - -static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surface render_surface, - const Surface blitted_surface) { - const auto& dst_params = blitted_surface->GetSurfaceParams(); - const auto& src_params = render_surface->GetSurfaceParams(); - const std::size_t src_memory_size = src_params.size_in_bytes; - const std::optional<u32> level = - TryFindBestMipMap(src_memory_size, dst_params, src_params.height); - if (level.has_value()) { - if (src_params.width == dst_params.MipWidthGobAligned(*level) && - src_params.height == dst_params.MipHeight(*level) && - src_params.block_height >= dst_params.MipBlockHeight(*level)) { - const std::optional<u32> slot = - TryFindBestLayer(render_surface->GetSurfaceParams().gpu_addr, dst_params, *level); - if (slot.has_value()) { - glCopyImageSubData(render_surface->Texture().handle, - SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, - blitted_surface->Texture().handle, - SurfaceTargetToGL(dst_params.target), *level, 0, 0, *slot, - dst_params.MipWidth(*level), dst_params.MipHeight(*level), 1); - blitted_surface->MarkAsModified(true, cache); - return true; - } - } - } - return false; -} - -static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { - const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); - const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); - if (bound2 > bound1) - return true; - const auto& dst_params = blitted_surface->GetSurfaceParams(); - const auto& src_params = render_surface->GetSurfaceParams(); - return (dst_params.component_type != src_params.component_type); -} - -static bool IsReinterpretInvalidSecond(const Surface render_surface, - const Surface blitted_surface) { - const auto& dst_params = blitted_surface->GetSurfaceParams(); - const auto& src_params = render_surface->GetSurfaceParams(); - return (dst_params.height > src_params.height && dst_params.width > src_params.width); -} - -bool RasterizerCacheOpenGL::PartialReinterpretSurface(Surface triggering_surface, - Surface intersect) { - if (IsReinterpretInvalid(triggering_surface, intersect)) { - Unregister(intersect); - return false; - } - if (!LayerFitReinterpretSurface(*this, triggering_surface, intersect)) { - if (IsReinterpretInvalidSecond(triggering_surface, intersect)) { - Unregister(intersect); - return false; - } - FlushObject(intersect); - FlushObject(triggering_surface); - intersect->MarkForReload(true); - } - return true; -} - -void RasterizerCacheOpenGL::SignalPreDrawCall() { - if (texception && GLAD_GL_ARB_texture_barrier) { - glTextureBarrier(); - } - texception = false; -} - -void RasterizerCacheOpenGL::SignalPostDrawCall() { - for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { - if (current_color_buffers[i] != nullptr) { - Surface intersect = - CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); - if (intersect != nullptr) { - PartialReinterpretSurface(current_color_buffers[i], intersect); - texception = true; - } - } - } -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h deleted file mode 100644 index bbab79575..000000000 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h +++ /dev/null @@ -1,575 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <memory> -#include <string> -#include <tuple> -#include <vector> - -#include "common/alignment.h" -#include "common/bit_util.h" -#include "common/common_types.h" -#include "common/hash.h" -#include "common/math_util.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" -#include "video_core/surface.h" -#include "video_core/textures/decoders.h" -#include "video_core/textures/texture.h" - -namespace OpenGL { - -class CachedSurface; -using Surface = std::shared_ptr<CachedSurface>; -using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; - -using SurfaceTarget = VideoCore::Surface::SurfaceTarget; -using SurfaceType = VideoCore::Surface::SurfaceType; -using PixelFormat = VideoCore::Surface::PixelFormat; -using ComponentType = VideoCore::Surface::ComponentType; -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -struct SurfaceParams { - enum class SurfaceClass { - Uploaded, - RenderTarget, - DepthBuffer, - Copy, - }; - - static std::string SurfaceTargetName(SurfaceTarget target) { - switch (target) { - case SurfaceTarget::Texture1D: - return "Texture1D"; - case SurfaceTarget::Texture2D: - return "Texture2D"; - case SurfaceTarget::Texture3D: - return "Texture3D"; - case SurfaceTarget::Texture1DArray: - return "Texture1DArray"; - case SurfaceTarget::Texture2DArray: - return "Texture2DArray"; - case SurfaceTarget::TextureCubemap: - return "TextureCubemap"; - case SurfaceTarget::TextureCubeArray: - return "TextureCubeArray"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); - UNREACHABLE(); - return fmt::format("TextureUnknown({})", static_cast<u32>(target)); - } - } - - u32 GetFormatBpp() const { - return VideoCore::Surface::GetFormatBpp(pixel_format); - } - - /// Returns the rectangle corresponding to this surface - Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; - - /// Returns the total size of this surface in bytes, adjusted for compression - std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { - const u32 compression_factor{GetCompressionFactor(pixel_format)}; - const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; - const size_t uncompressed_size{ - Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width, - height, depth, block_height, block_depth)}; - - // Divide by compression_factor^2, as height and width are factored by this - return uncompressed_size / (compression_factor * compression_factor); - } - - /// Returns the size of this surface as an OpenGL texture in bytes - std::size_t SizeInBytesGL() const { - return SizeInBytesRaw(true); - } - - /// Returns the size of this surface as a cube face in bytes - std::size_t SizeInBytesCubeFace() const { - return size_in_bytes / 6; - } - - /// Returns the size of this surface as an OpenGL cube face in bytes - std::size_t SizeInBytesCubeFaceGL() const { - return size_in_bytes_gl / 6; - } - - /// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. - std::size_t MemorySize() const { - std::size_t size = InnerMemorySize(false, is_layered); - if (is_layered) - return size * depth; - return size; - } - - /// Returns true if the parameters constitute a valid rasterizer surface. - bool IsValid() const { - return gpu_addr && host_ptr && height && width; - } - - /// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including - /// mipmaps. - std::size_t LayerMemorySize() const { - return InnerMemorySize(false, true); - } - - /// Returns the size of a layer of this surface in OpenGL. - std::size_t LayerSizeGL(u32 mip_level) const { - return InnerMipmapMemorySize(mip_level, true, is_layered, false); - } - - std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const { - std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed); - if (is_layered) - return size * depth; - return size; - } - - std::size_t GetMipmapLevelOffset(u32 mip_level) const { - std::size_t offset = 0; - for (u32 i = 0; i < mip_level; i++) - offset += InnerMipmapMemorySize(i, false, is_layered); - return offset; - } - - std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const { - std::size_t offset = 0; - for (u32 i = 0; i < mip_level; i++) - offset += InnerMipmapMemorySize(i, true, is_layered); - return offset; - } - - std::size_t GetMipmapSingleSize(u32 mip_level) const { - return InnerMipmapMemorySize(mip_level, false, is_layered); - } - - u32 MipWidth(u32 mip_level) const { - return std::max(1U, width >> mip_level); - } - - u32 MipWidthGobAligned(u32 mip_level) const { - return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); - } - - u32 MipHeight(u32 mip_level) const { - return std::max(1U, height >> mip_level); - } - - u32 MipDepth(u32 mip_level) const { - return is_layered ? depth : std::max(1U, depth >> mip_level); - } - - // Auto block resizing algorithm from: - // https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c - u32 MipBlockHeight(u32 mip_level) const { - if (mip_level == 0) - return block_height; - u32 alt_height = MipHeight(mip_level); - u32 h = GetDefaultBlockHeight(pixel_format); - u32 blocks_in_y = (alt_height + h - 1) / h; - u32 bh = 16; - while (bh > 1 && blocks_in_y <= bh * 4) { - bh >>= 1; - } - return bh; - } - - u32 MipBlockDepth(u32 mip_level) const { - if (mip_level == 0) { - return block_depth; - } - - if (is_layered) { - return 1; - } - - const u32 mip_depth = MipDepth(mip_level); - u32 bd = 32; - while (bd > 1 && mip_depth * 2 <= bd) { - bd >>= 1; - } - - if (bd == 32) { - const u32 bh = MipBlockHeight(mip_level); - if (bh >= 4) { - return 16; - } - } - - return bd; - } - - u32 RowAlign(u32 mip_level) const { - const u32 m_width = MipWidth(mip_level); - const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); - const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); - return (1U << l2); - } - - /// Creates SurfaceParams from a texture configuration - static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, - const GLShader::SamplerEntry& entry); - - /// Creates SurfaceParams from a framebuffer configuration - static SurfaceParams CreateForFramebuffer(std::size_t index); - - /// Creates SurfaceParams for a depth buffer configuration - static SurfaceParams CreateForDepthBuffer( - u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, - u32 block_width, u32 block_height, u32 block_depth, - Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); - - /// Creates SurfaceParams for a Fermi2D surface copy - static SurfaceParams CreateForFermiCopySurface( - const Tegra::Engines::Fermi2D::Regs::Surface& config); - - /// Checks if surfaces are compatible for caching - bool IsCompatibleSurface(const SurfaceParams& other) const { - if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) == - std::tie(other.pixel_format, other.type, other.width, other.height, other.target, - other.depth, other.is_tiled)) { - if (!is_tiled) - return true; - return std::tie(block_height, block_depth, tile_width_spacing) == - std::tie(other.block_height, other.block_depth, other.tile_width_spacing); - } - return false; - } - - /// Initializes parameters for caching, should be called after everything has been initialized - void InitCacheParameters(GPUVAddr gpu_addr); - - std::string TargetName() const { - switch (target) { - case SurfaceTarget::Texture1D: - return "1D"; - case SurfaceTarget::TextureBuffer: - return "Buffer"; - case SurfaceTarget::Texture2D: - return "2D"; - case SurfaceTarget::Texture3D: - return "3D"; - case SurfaceTarget::Texture1DArray: - return "1DArray"; - case SurfaceTarget::Texture2DArray: - return "2DArray"; - case SurfaceTarget::TextureCubemap: - return "Cube"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); - UNREACHABLE(); - return fmt::format("TUK({})", static_cast<u32>(target)); - } - } - - std::string ClassName() const { - switch (identity) { - case SurfaceClass::Uploaded: - return "UP"; - case SurfaceClass::RenderTarget: - return "RT"; - case SurfaceClass::DepthBuffer: - return "DB"; - case SurfaceClass::Copy: - return "CP"; - default: - LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity)); - UNREACHABLE(); - return fmt::format("CUK({})", static_cast<u32>(identity)); - } - } - - std::string IdentityString() const { - return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L'); - } - - bool is_tiled; - u32 block_width; - u32 block_height; - u32 block_depth; - u32 tile_width_spacing; - PixelFormat pixel_format; - ComponentType component_type; - SurfaceType type; - u32 width; - u32 height; - u32 depth; - u32 unaligned_height; - u32 pitch; - SurfaceTarget target; - SurfaceClass identity; - u32 max_mip_level; - bool is_layered; - bool is_array; - bool srgb_conversion; - // Parameters used for caching - u8* host_ptr; - GPUVAddr gpu_addr; - std::size_t size_in_bytes; - std::size_t size_in_bytes_gl; - - // Render target specific parameters, not used in caching - struct { - u32 index; - u32 array_mode; - u32 volume; - u32 layer_stride; - u32 base_layer; - } rt; - -private: - std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false, - bool uncompressed = false) const; - std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false, - bool uncompressed = false) const; -}; - -}; // namespace OpenGL - -/// Hashable variation of SurfaceParams, used for a key in the surface cache -struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { - static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { - SurfaceReserveKey res; - res.state = params; - res.state.identity = {}; // Ignore the origin of the texture - res.state.gpu_addr = {}; // Ignore GPU vaddr in caching - res.state.rt = {}; // Ignore rt config in caching - return res; - } -}; -namespace std { -template <> -struct hash<SurfaceReserveKey> { - std::size_t operator()(const SurfaceReserveKey& k) const { - return k.Hash(); - } -}; -} // namespace std - -namespace OpenGL { - -class RasterizerOpenGL; - -// This is used to store temporary big buffers, -// instead of creating/destroying all the time -struct RasterizerTemporaryMemory { - std::vector<std::vector<u8>> gl_buffer; -}; - -class CachedSurface final : public RasterizerCacheObject { -public: - explicit CachedSurface(const SurfaceParams& params); - - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return cached_size_in_bytes; - } - - std::size_t GetMemorySize() const { - return memory_size; - } - - const OGLTexture& Texture() const { - return texture; - } - - const OGLTexture& Texture(bool as_array) { - if (params.is_array == as_array) { - return texture; - } else { - EnsureTextureDiscrepantView(); - return discrepant_view; - } - } - - GLenum Target() const { - return gl_target; - } - - const SurfaceParams& GetSurfaceParams() const { - return params; - } - - // Read/Write data in Switch memory to/from gl_buffer - void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); - void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); - - // Upload data in gl_buffer to this surface's texture - void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, - GLuint draw_fb_handle); - - void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, - Tegra::Texture::SwizzleSource swizzle_y, - Tegra::Texture::SwizzleSource swizzle_z, - Tegra::Texture::SwizzleSource swizzle_w); - - void MarkReinterpreted() { - reinterpreted = true; - } - - bool IsReinterpreted() const { - return reinterpreted; - } - - void MarkForReload(bool reload) { - must_reload = reload; - } - - bool MustReload() const { - return must_reload; - } - - bool IsUploaded() const { - return params.identity == SurfaceParams::SurfaceClass::Uploaded; - } - -private: - void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, - GLuint read_fb_handle, GLuint draw_fb_handle); - - void EnsureTextureDiscrepantView(); - - OGLTexture texture; - OGLTexture discrepant_view; - OGLBuffer texture_buffer; - SurfaceParams params{}; - GLenum gl_target{}; - GLenum gl_internal_format{}; - std::size_t cached_size_in_bytes{}; - std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; - std::size_t memory_size; - bool reinterpreted = false; - bool must_reload = false; - VAddr cpu_addr{}; -}; - -class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { -public: - explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer); - - /// Get a surface based on the texture configuration - Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, - const GLShader::SamplerEntry& entry); - - /// Get the depth surface based on the framebuffer configuration - Surface GetDepthBufferSurface(bool preserve_contents); - - /// Get the color surface based on the framebuffer configuration and the specified render target - Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); - - /// Tries to find a framebuffer using on the provided CPU address - Surface TryFindFramebufferSurface(const u8* host_ptr) const; - - /// Copies the contents of one surface to another - void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, - const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, - const Common::Rectangle<u32>& src_rect, - const Common::Rectangle<u32>& dst_rect); - - void SignalPreDrawCall(); - void SignalPostDrawCall(); - -protected: - void FlushObjectInner(const Surface& object) override { - object->FlushGLBuffer(temporal_memory); - } - -private: - void LoadSurface(const Surface& surface); - Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); - - /// Gets an uncached surface, creating it if need be - Surface GetUncachedSurface(const SurfaceParams& params); - - /// Recreates a surface with new parameters - Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params); - - /// Reserves a unique surface that can be reused later - void ReserveSurface(const Surface& surface); - - /// Tries to get a reserved surface for the specified parameters - Surface TryGetReservedSurface(const SurfaceParams& params); - - // Partialy reinterpret a surface based on a triggering_surface that collides with it. - // returns true if the reinterpret was successful, false in case it was not. - bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); - - /// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data - void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); - void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); - void FastCopySurface(const Surface& src_surface, const Surface& dst_surface); - void CopySurface(const Surface& src_surface, const Surface& dst_surface, - const GLuint copy_pbo_handle, const GLenum src_attachment = 0, - const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0); - - /// The surface reserve is a "backup" cache, this is where we put unique surfaces that have - /// previously been used. This is to prevent surfaces from being constantly created and - /// destroyed when used with different surface parameters. - std::unordered_map<SurfaceReserveKey, Surface> surface_reserve; - - OGLFramebuffer read_framebuffer; - OGLFramebuffer draw_framebuffer; - - bool texception = false; - - /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one - /// using the new format. - OGLBuffer copy_pbo; - - std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; - std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; - Surface last_depth_buffer; - - RasterizerTemporaryMemory temporal_memory; - - using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; - using SurfaceInterval = typename SurfaceIntervalCache::interval_type; - - static auto GetReinterpretInterval(const Surface& object) { - return SurfaceInterval::right_open(object->GetCacheAddr() + 1, - object->GetCacheAddr() + object->GetMemorySize() - 1); - } - - // Reinterpreted surfaces are very fragil as the game may keep rendering into them. - SurfaceIntervalCache reinterpreted_surfaces; - - void RegisterReinterpretSurface(Surface reinterpret_surface) { - auto interval = GetReinterpretInterval(reinterpret_surface); - reinterpreted_surfaces.insert({interval, reinterpret_surface}); - reinterpret_surface->MarkReinterpreted(); - } - - Surface CollideOnReinterpretedSurface(CacheAddr addr) const { - const SurfaceInterval interval{addr}; - for (auto& pair : - boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { - return pair.second; - } - return nullptr; - } - - void Register(const Surface& object) override { - RasterizerCache<Surface>::Register(object); - } - - /// Unregisters an object from the cache - void Unregister(const Surface& object) override { - if (object->IsReinterpreted()) { - auto interval = GetReinterpretInterval(object); - reinterpreted_surfaces.erase(interval); - } - RasterizerCache<Surface>::Unregister(object); - } -}; - -} // namespace OpenGL |