diff options
author | Feng Chen <VonChenPlus@gmail.com> | 2022-09-20 05:56:43 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-09-20 05:56:43 +0200 |
commit | c864cb57726e76e9dc4558036f3212168bec825d (patch) | |
tree | ca79c4397f40990488a7b5691e15c0fcfec507b6 /src/video_core | |
parent | video_core: Generate mipmap texture by drawing (diff) | |
parent | Merge pull request #8849 from Morph1984/parallel-astc (diff) | |
download | yuzu-c864cb57726e76e9dc4558036f3212168bec825d.tar yuzu-c864cb57726e76e9dc4558036f3212168bec825d.tar.gz yuzu-c864cb57726e76e9dc4558036f3212168bec825d.tar.bz2 yuzu-c864cb57726e76e9dc4558036f3212168bec825d.tar.lz yuzu-c864cb57726e76e9dc4558036f3212168bec825d.tar.xz yuzu-c864cb57726e76e9dc4558036f3212168bec825d.tar.zst yuzu-c864cb57726e76e9dc4558036f3212168bec825d.zip |
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 5 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader_environment.cpp | 7 | ||||
-rw-r--r-- | src/video_core/textures/astc.cpp | 56 |
4 files changed, 42 insertions, 28 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index 0b2bc67b1..f9a6472cf 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -12,6 +12,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/div_ceil.h" +#include "common/settings.h" #include "core/memory.h" namespace VideoCommon { @@ -219,7 +220,9 @@ public: NotifyRasterizer<false>(word_index, untracked_words[word_index], cached_bits); untracked_words[word_index] |= cached_bits; cpu_words[word_index] |= cached_bits; - cached_words[word_index] = 0; + if (!Settings::values.use_pessimistic_flushes) { + cached_words[word_index] = 0; + } } } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 3adad5af4..9708dc45e 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -53,7 +53,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 5; +constexpr u32 CACHE_VERSION = 6; template <typename Container> auto MakeSpan(Container& container) { diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp index 3ead06dd6..c903975fc 100644 --- a/src/video_core/shader_environment.cpp +++ b/src/video_core/shader_environment.cpp @@ -39,11 +39,8 @@ static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { return Shader::TextureType::Color1D; case Tegra::Texture::TextureType::Texture2D: case Tegra::Texture::TextureType::Texture2DNoMipmap: - if (entry.normalized_coords) { - return Shader::TextureType::Color2D; - } else { - return Shader::TextureType::Color2DRect; - } + return entry.normalized_coords ? Shader::TextureType::Color2D + : Shader::TextureType::Color2DRect; case Tegra::Texture::TextureType::Texture3D: return Shader::TextureType::Color3D; case Tegra::Texture::TextureType::TextureCubemap: diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp index e3f3d3c5d..b159494c5 100644 --- a/src/video_core/textures/astc.cpp +++ b/src/video_core/textures/astc.cpp @@ -13,7 +13,9 @@ #include <boost/container/static_vector.hpp> +#include "common/alignment.h" #include "common/common_types.h" +#include "common/thread_worker.h" #include "video_core/textures/astc.h" class InputBitStream { @@ -1650,29 +1652,41 @@ static void DecompressBlock(std::span<const u8, 16> inBuf, const u32 blockWidth, void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height, uint32_t depth, uint32_t block_width, uint32_t block_height, std::span<uint8_t> output) { - u32 block_index = 0; - std::size_t depth_offset = 0; - for (u32 z = 0; z < depth; z++) { - for (u32 y = 0; y < height; y += block_height) { - for (u32 x = 0; x < width; x += block_width) { - const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; - - // Blocks can be at most 12x12 - std::array<u32, 12 * 12> uncompData; - DecompressBlock(blockPtr, block_width, block_height, uncompData); - - u32 decompWidth = std::min(block_width, width - x); - u32 decompHeight = std::min(block_height, height - y); - - const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); - for (u32 jj = 0; jj < decompHeight; jj++) { - std::memcpy(outRow.data() + jj * width * 4, - uncompData.data() + jj * block_width, decompWidth * 4); + const u32 rows = Common::DivideUp(height, block_height); + const u32 cols = Common::DivideUp(width, block_width); + + Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2, + "yuzu:ASTCDecompress"}; + + for (u32 z = 0; z < depth; ++z) { + const u32 depth_offset = z * height * width * 4; + for (u32 y_index = 0; y_index < rows; ++y_index) { + auto decompress_stride = [data, width, height, depth, block_width, block_height, output, + rows, cols, z, depth_offset, y_index] { + const u32 y = y_index * block_height; + for (u32 x_index = 0; x_index < cols; ++x_index) { + const u32 block_index = (z * rows * cols) + (y_index * cols) + x_index; + const u32 x = x_index * block_width; + + const std::span<const u8, 16> blockPtr{data.subspan(block_index * 16, 16)}; + + // Blocks can be at most 12x12 + std::array<u32, 12 * 12> uncompData; + DecompressBlock(blockPtr, block_width, block_height, uncompData); + + u32 decompWidth = std::min(block_width, width - x); + u32 decompHeight = std::min(block_height, height - y); + + const std::span<u8> outRow = output.subspan(depth_offset + (y * width + x) * 4); + for (u32 h = 0; h < decompHeight; ++h) { + std::memcpy(outRow.data() + h * width * 4, + uncompData.data() + h * block_width, decompWidth * 4); + } } - ++block_index; - } + }; + workers.QueueWork(std::move(decompress_stride)); } - depth_offset += height * width * 4; + workers.WaitForRequests(); } } |