diff options
author | Rodrigo Locatti <reinuseslisp@airmail.cc> | 2021-03-09 06:47:51 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-09 06:47:51 +0100 |
commit | daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71 (patch) | |
tree | aaa1cc96298e6e114cb72dfb517f7a000d2a467d /src/video_core/renderer_opengl | |
parent | Merge pull request #6021 from ReinUsesLisp/skip-cache-heuristic (diff) | |
parent | texture_cache: Blacklist BGRA8 copies and views on OpenGL (diff) | |
download | yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar.gz yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar.bz2 yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar.lz yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar.xz yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.tar.zst yuzu-daf5c5060b4b2e4aa985fbfe9724eb99c51bbd71.zip |
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 20 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/util_shaders.cpp | 76 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/util_shaders.h | 22 |
4 files changed, 121 insertions, 2 deletions
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 12434db67..e028677e9 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT {GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM - {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM {GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT {GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT @@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ {GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM {GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM {GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM - {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM + {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB @@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { } } +[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) { + switch (format) { + case PixelFormat::B5G6R5_UNORM: + case PixelFormat::B8G8R8A8_UNORM: + case PixelFormat::B8G8R8A8_SRGB: + return true; + default: + return false; + } +} + } // Anonymous namespace ImageBufferMap::~ImageBufferMap() { @@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) { if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { return false; } + if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { + return false; + } return true; } @@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src, if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) { ASSERT(src.info.type == ImageType::e3D); util_shaders.CopyBC4(dst, src, copies); + } else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) { + util_shaders.CopyBGR(dst, src, copies); } else { UNREACHABLE(); } diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index a6172f009..3fbaa102f 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -86,6 +86,11 @@ public: FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const; + bool HasNativeBgr() const noexcept { + // OpenGL does not have native support for the BGR internal format + return false; + } + bool HasBrokenTextureViewFormats() const noexcept { return has_broken_texture_view_formats; } diff --git a/src/video_core/renderer_opengl/util_shaders.cpp b/src/video_core/renderer_opengl/util_shaders.cpp index 31ec68505..2fe4799bc 100644 --- a/src/video_core/renderer_opengl/util_shaders.cpp +++ b/src/video_core/renderer_opengl/util_shaders.cpp @@ -14,6 +14,7 @@ #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" #include "video_core/host_shaders/opengl_copy_bc4_comp.h" +#include "video_core/host_shaders/opengl_copy_bgra_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h" @@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) { return program; } +size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) { + return static_cast<size_t>(copy.extent.width * copy.extent.height * + copy.src_subresource.num_layers); +} + } // Anonymous namespace UtilShaders::UtilShaders(ProgramManager& program_manager_) @@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)), block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), + copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)), copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) { const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); swizzle_table_buffer.Create(); @@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im program_manager.RestoreGuestCompute(); } +void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies) { + static constexpr GLuint BINDING_INPUT_IMAGE = 0; + static constexpr GLuint BINDING_OUTPUT_IMAGE = 1; + static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0}; + const u32 bytes_per_block = BytesPerBlock(dst_image.info.format); + switch (bytes_per_block) { + case 2: + // BGR565 copy + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_offset == zero_offset); + ASSERT(copy.dst_offset == zero_offset); + bgr_copy_pass.Execute(dst_image, src_image, copy); + } + break; + case 4: { + // BGRA8 copy + program_manager.BindHostCompute(copy_bgra_program.handle); + constexpr GLenum FORMAT = GL_RGBA8; + for (const ImageCopy& copy : copies) { + ASSERT(copy.src_offset == zero_offset); + ASSERT(copy.dst_offset == zero_offset); + glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(), + copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT); + glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(), + copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT); + glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth); + } + program_manager.RestoreGuestCompute(); + break; + } + default: + UNREACHABLE(); + break; + } +} + GLenum StoreFormat(u32 bytes_per_block) { switch (bytes_per_block) { case 1: @@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) { return GL_R8UI; } +void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image, + const ImageCopy& copy) { + if (CopyBufferCreationNeeded(copy)) { + CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565); + } + // Copy from source to PBO + glPixelStorei(GL_PACK_ALIGNMENT, 1); + glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle); + glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, + static_cast<GLsizei>(bgr16_pbo_size), nullptr); + + // Copy from PBO to destination in reverse order + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width); + glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle); + glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height, + copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, + nullptr); +} + +bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) { + return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16); +} + +void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) { + bgr16_pbo.Create(); + bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16); + glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/util_shaders.h b/src/video_core/renderer_opengl/util_shaders.h index 7b1d16b09..93b009743 100644 --- a/src/video_core/renderer_opengl/util_shaders.h +++ b/src/video_core/renderer_opengl/util_shaders.h @@ -19,6 +19,22 @@ class ProgramManager; struct ImageBufferMap; +class Bgr565CopyPass { +public: + Bgr565CopyPass() = default; + ~Bgr565CopyPass() = default; + + void Execute(const Image& dst_image, const Image& src_image, + const VideoCommon::ImageCopy& copy); + +private: + [[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy); + void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format); + + OGLBuffer bgr16_pbo; + size_t bgr16_pbo_size{}; +}; + class UtilShaders { public: explicit UtilShaders(ProgramManager& program_manager); @@ -36,6 +52,9 @@ public: void CopyBC4(Image& dst_image, Image& src_image, std::span<const VideoCommon::ImageCopy> copies); + void CopyBGR(Image& dst_image, Image& src_image, + std::span<const VideoCommon::ImageCopy> copies); + private: ProgramManager& program_manager; @@ -44,7 +63,10 @@ private: OGLProgram block_linear_unswizzle_2d_program; OGLProgram block_linear_unswizzle_3d_program; OGLProgram pitch_unswizzle_program; + OGLProgram copy_bgra_program; OGLProgram copy_bc4_program; + + Bgr565CopyPass bgr_copy_pass; }; GLenum StoreFormat(u32 bytes_per_block); |