From db3eb168cd6a55c6387e3bfb23fb2eca061a691b Mon Sep 17 00:00:00 2001 From: Liam Date: Thu, 25 Aug 2022 12:32:14 -0400 Subject: video_core: add option for pessimistic flushing --- src/video_core/buffer_cache/buffer_base.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/video_core') diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 0b2bc67b1..f9a6472cf 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -12,6 +12,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/div_ceil.h" +#include "common/settings.h" #include "core/memory.h" namespace VideoCommon { @@ -219,7 +220,9 @@ public: NotifyRasterizer(word_index, untracked_words[word_index], cached_bits); untracked_words[word_index] |= cached_bits; cpu_words[word_index] |= cached_bits; - cached_words[word_index] = 0; + if (!Settings::values.use_pessimistic_flushes) { + cached_words[word_index] = 0; + } } } -- cgit v1.2.3 From 7e379207ec7164cf9020bb83a5e2297dc463835d Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 31 Aug 2022 08:37:00 -0400 Subject: (shader/pipeline)_cache: Raise shader/pipeline cache version Since the following commit: https://github.com/yuzu-emu/yuzu/commit/a83a5d2e4c8932df864dd4cea2b04d87a12c8760 , many games will refuse to boot unless the shader/pipeline cache has been invalidated. --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 1ad56d9e7..ddb70934c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -49,7 +49,7 @@ using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; -constexpr u32 CACHE_VERSION = 5; +constexpr u32 CACHE_VERSION = 6; template auto MakeSpan(Container& container) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3adad5af4..9708dc45e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -53,7 +53,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 5; +constexpr u32 CACHE_VERSION = 6; template auto MakeSpan(Container& container) { -- cgit v1.2.3 From 95333654861b62167b9a8052ff3faaa931930d17 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 31 Aug 2022 08:51:47 -0400 Subject: style: General style changes to match with the rest of the codebase --- src/video_core/shader_environment.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 808d88eec..5f7625947 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -39,11 +39,8 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { return Shader::TextureType::Color1D; case Tegra::Texture::TextureType::Texture2D: case Tegra::Texture::TextureType::Texture2DNoMipmap: - if (entry.normalized_coords) { - return Shader::TextureType::Color2D; - } else { - return Shader::TextureType::Color2DRect; - } + return entry.normalized_coords ? Shader::TextureType::Color2D + : Shader::TextureType::Color2DRect; case Tegra::Texture::TextureType::Texture3D: return Shader::TextureType::Color3D; case Tegra::Texture::TextureType::TextureCubemap: -- cgit v1.2.3 From 809126c94a0ed8e7964d5a550abf7b3731d00512 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Thu, 1 Sep 2022 21:29:22 -0400 Subject: astc: Enable parallel CPU astc decoding Given the issues with GPU accelerated ASTC decoding with NVIDIA's latest drivers, parallelize astc decoding on the CPU. Uses half the available threads in the system for astc decoding. --- src/video_core/textures/astc.cpp | 56 +++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 21 deletions(-) (limited to 'src/video_core') diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index e3f3d3c5d..b159494c5 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -13,7 +13,9 @@ #include +#include "common/alignment.h" #include "common/common_types.h" +#include "common/thread_worker.h" #include "video_core/textures/astc.h" class InputBitStream { @@ -1650,29 +1652,41 @@ static void DecompressBlock(std::span inBuf, const u32 blockWidth, void Decompress(std::span data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span output) { - u32 block_index = 0; - std::size_t depth_offset = 0; - for (u32 z = 0; z < depth; z++) { - for (u32 y = 0; y < height; y += block_height) { - for (u32 x = 0; x < width; x += block_width) { - const std::span blockPtr{data.subspan(block_index * 16, 16)}; - - // Blocks can be at most 12x12 - std::array uncompData; - DecompressBlock(blockPtr, block_width, block_height, uncompData); - - u32 decompWidth = std::min(block_width, width - x); - u32 decompHeight = std::min(block_height, height - y); - - const std::span outRow = output.subspan(depth_offset + (y * width + x) * 4); - for (u32 jj = 0; jj < decompHeight; jj++) { - std::memcpy(outRow.data() + jj * width * 4, - uncompData.data() + jj * block_width, decompWidth * 4); + const u32 rows = Common::DivideUp(height, block_height); + const u32 cols = Common::DivideUp(width, block_width); + + Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, + "yuzu:ASTCDecompress"}; + + for (u32 z = 0; z < depth; ++z) { + const u32 depth_offset = z * height * width * 4; + for (u32 y_index = 0; y_index < rows; ++y_index) { + auto decompress_stride = [data, width, height, depth, block_width, block_height, output, + rows, cols, z, depth_offset, y_index] { + const u32 y = y_index * block_height; + for (u32 x_index = 0; x_index < cols; ++x_index) { + const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index; + const u32 x = x_index * block_width; + + const std::span blockPtr{data.subspan(block_index * 16, 16)}; + + // Blocks can be at most 12x12 + std::array uncompData; + DecompressBlock(blockPtr, block_width, block_height, uncompData); + + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); + + const std::span outRow = output.subspan(depth_offset + (y * width + x) * 4); + for (u32 h = 0; h < decompHeight; ++h) { + std::memcpy(outRow.data() + h * width * 4, + uncompData.data() + h * block_width, decompWidth * 4); + } } - ++block_index; - } + }; + workers.QueueWork(std::move(decompress_stride)); } - depth_offset += height * width * 4; + workers.WaitForRequests(); } } -- cgit v1.2.3