diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 3 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 21 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 58 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_graphics_pipeline.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 719 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_pipeline_cache.h | 30 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 | ||||
-rw-r--r-- | src/video_core/shader_cache.cpp | 233 | ||||
-rw-r--r-- | src/video_core/shader_cache.h | 198 | ||||
-rw-r--r-- | src/video_core/shader_environment.cpp | 453 | ||||
-rw-r--r-- | src/video_core/shader_environment.h | 198 |
12 files changed, 1095 insertions, 824 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3166a69dc..6e0e4b8f5 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -145,7 +145,10 @@ add_library(video_core STATIC renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h + shader_cache.cpp shader_cache.h + shader_environment.cpp + shader_environment.h shader_notify.cpp shader_notify.h surface.cpp diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1f58f8791..2fdcbe4ba 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -217,7 +217,7 @@ private: TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; BufferCache buffer_cache; - ShaderCacheOpenGL shader_cache; + ShaderCache shader_cache; QueryCache query_cache; AccelerateDMA accelerate_dma; FenceManagerOpenGL fence_manager; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 4dd166156..c3e490b40 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -29,18 +29,13 @@ namespace OpenGL { -Shader::Shader() = default; - -Shader::~Shader() = default; - -ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_) - : ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_}, - maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {} - -ShaderCacheOpenGL::~ShaderCacheOpenGL() = default; +ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_) + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + emu_window{emu_window_}, gpu{gpu_}, device{device_} {} + +ShaderCache::~ShaderCache() = default; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index ad3d15a76..96520e17c 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -36,27 +36,59 @@ class RasterizerOpenGL; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -class Shader { +struct GraphicsProgramKey { + struct TransformFeedbackState { + struct Layout { + u32 stream; + u32 varying_count; + u32 stride; + }; + std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts; + std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings; + }; + + std::array<u64, 6> unique_hashes; + std::array<u8, Maxwell::NumRenderTargets> color_formats; + union { + u32 raw; + BitField<0, 1, u32> xfb_enabled; + BitField<1, 1, u32> early_z; + BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; + BitField<6, 2, u32> tessellation_primitive; + BitField<8, 2, u32> tessellation_spacing; + BitField<10, 1, u32> tessellation_clockwise; + }; + u32 padding; + TransformFeedbackState xfb_state; + + [[nodiscard]] size_t Size() const noexcept { + if (xfb_enabled != 0) { + return sizeof(GraphicsProgramKey); + } else { + return offsetof(GraphicsProgramKey, padding); + } + } +}; +static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>); +static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>); +static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>); + +class GraphicsProgram { public: - explicit Shader(); - ~Shader(); +private: }; -class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> { +class ShaderCache : public VideoCommon::ShaderCache { public: - explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_, - Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, const Device& device_); - ~ShaderCacheOpenGL() override; + explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, + Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, const Device& device_); + ~ShaderCache(); private: Core::Frontend::EmuWindow& emu_window; Tegra::GPU& gpu; - Tegra::MemoryManager& gpu_memory; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; const Device& device; }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 85e21f611..e362d13c5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -23,7 +23,7 @@ namespace Vulkan { struct GraphicsPipelineCacheKey { - std::array<u128, 6> unique_hashes; + std::array<u64, 6> unique_hashes; FixedPipelineState state; size_t Hash() const noexcept; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 9d9729022..0822862fe 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -11,7 +11,8 @@ #include "common/bit_cast.h" #include "common/cityhash.h" -#include "common/file_util.h" +#include "common/fs/fs.h" +#include "common/fs/path_util.h" #include "common/microprofile.h" #include "common/thread_worker.h" #include "core/core.h" @@ -36,6 +37,7 @@ #include "video_core/renderer_vulkan/vk_shader_util.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" #include "video_core/shader_notify.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -43,449 +45,19 @@ namespace Vulkan { MICROPROFILE_DECLARE(Vulkan_PipelineCache); -template <typename Container> -auto MakeSpan(Container& container) { - return std::span(container.data(), container.size()); -} - -static u64 MakeCbufKey(u32 index, u32 offset) { - return (static_cast<u64>(index) << 32) | offset; -} - -class GenericEnvironment : public Shader::Environment { -public: - explicit GenericEnvironment() = default; - explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : gpu_memory{&gpu_memory_}, program_base{program_base_} { - start_address = start_address_; - } - - ~GenericEnvironment() override = default; - - u32 TextureBoundBuffer() const final { - return texture_bound; - } - - u32 LocalMemorySize() const final { - return local_memory_size; - } - - u32 SharedMemorySize() const final { - return shared_memory_size; - } - - std::array<u32, 3> WorkgroupSize() const final { - return workgroup_size; - } - - u64 ReadInstruction(u32 address) final { - read_lowest = std::min(read_lowest, address); - read_highest = std::max(read_highest, address); - - if (address >= cached_lowest && address < cached_highest) { - return code[(address - cached_lowest) / INST_SIZE]; - } - has_unbound_instructions = true; - return gpu_memory->Read<u64>(program_base + address); - } - - std::optional<u128> Analyze() { - const std::optional<u64> size{TryFindSize()}; - if (!size) { - return std::nullopt; - } - cached_lowest = start_address; - cached_highest = start_address + static_cast<u32>(*size); - return Common::CityHash128(reinterpret_cast<const char*>(code.data()), *size); - } - - void SetCachedSize(size_t size_bytes) { - cached_lowest = start_address; - cached_highest = start_address + static_cast<u32>(size_bytes); - code.resize(CachedSize()); - gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); - } - - [[nodiscard]] size_t CachedSize() const noexcept { - return cached_highest - cached_lowest + INST_SIZE; - } - - [[nodiscard]] size_t ReadSize() const noexcept { - return read_highest - read_lowest + INST_SIZE; - } - - [[nodiscard]] bool CanBeSerialized() const noexcept { - return !has_unbound_instructions; - } - - [[nodiscard]] u128 CalculateHash() const { - const size_t size{ReadSize()}; - const auto data{std::make_unique<char[]>(size)}; - gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); - return Common::CityHash128(data.get(), size); - } - - void Serialize(std::ofstream& file) const { - const u64 code_size{static_cast<u64>(CachedSize())}; - const u64 num_texture_types{static_cast<u64>(texture_types.size())}; - const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; - - file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) - .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) - .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) - .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) - .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) - .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) - .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) - .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) - .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) - .write(reinterpret_cast<const char*>(code.data()), code_size); - for (const auto [key, type] : texture_types) { - file.write(reinterpret_cast<const char*>(&key), sizeof(key)) - .write(reinterpret_cast<const char*>(&type), sizeof(type)); - } - for (const auto [key, type] : cbuf_values) { - file.write(reinterpret_cast<const char*>(&key), sizeof(key)) - .write(reinterpret_cast<const char*>(&type), sizeof(type)); - } - if (stage == Shader::Stage::Compute) { - file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) - .write(reinterpret_cast<const char*>(&shared_memory_size), - sizeof(shared_memory_size)); - } else { - file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); - } - } - -protected: - static constexpr size_t INST_SIZE = sizeof(u64); - - std::optional<u64> TryFindSize() { - constexpr size_t BLOCK_SIZE = 0x1000; - constexpr size_t MAXIMUM_SIZE = 0x100000; - - constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; - constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; - - GPUVAddr guest_addr{program_base + start_address}; - size_t offset{0}; - size_t size{BLOCK_SIZE}; - while (size <= MAXIMUM_SIZE) { - code.resize(size / INST_SIZE); - u64* const data = code.data() + offset / INST_SIZE; - gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); - for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { - const u64 inst = data[index / INST_SIZE]; - if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { - return offset + index; - } - } - guest_addr += BLOCK_SIZE; - size += BLOCK_SIZE; - offset += BLOCK_SIZE; - } - return std::nullopt; - } - - Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, - u32 raw) { - const TextureHandle handle{raw, via_header_index}; - const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; - Tegra::Texture::TICEntry entry; - gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); - - const Shader::TextureType result{[&] { - switch (entry.texture_type) { - case Tegra::Texture::TextureType::Texture1D: - return Shader::TextureType::Color1D; - case Tegra::Texture::TextureType::Texture2D: - case Tegra::Texture::TextureType::Texture2DNoMipmap: - return Shader::TextureType::Color2D; - case Tegra::Texture::TextureType::Texture3D: - return Shader::TextureType::Color3D; - case Tegra::Texture::TextureType::TextureCubemap: - return Shader::TextureType::ColorCube; - case Tegra::Texture::TextureType::Texture1DArray: - return Shader::TextureType::ColorArray1D; - case Tegra::Texture::TextureType::Texture2DArray: - return Shader::TextureType::ColorArray2D; - case Tegra::Texture::TextureType::Texture1DBuffer: - return Shader::TextureType::Buffer; - case Tegra::Texture::TextureType::TextureCubeArray: - return Shader::TextureType::ColorArrayCube; - default: - throw Shader::NotImplementedException("Unknown texture type"); - } - }()}; - texture_types.emplace(raw, result); - return result; - } - - Tegra::MemoryManager* gpu_memory{}; - GPUVAddr program_base{}; - - std::vector<u64> code; - std::unordered_map<u32, Shader::TextureType> texture_types; - std::unordered_map<u64, u32> cbuf_values; - - u32 local_memory_size{}; - u32 texture_bound{}; - u32 shared_memory_size{}; - std::array<u32, 3> workgroup_size{}; - - u32 read_lowest = std::numeric_limits<u32>::max(); - u32 read_highest = 0; - - u32 cached_lowest = std::numeric_limits<u32>::max(); - u32 cached_highest = 0; - - bool has_unbound_instructions = false; -}; - namespace { using Shader::Backend::SPIRV::EmitSPIRV; using Shader::Maxwell::TranslateProgram; +using VideoCommon::ComputeEnvironment; +using VideoCommon::FileEnvironment; +using VideoCommon::GenericEnvironment; +using VideoCommon::GraphicsEnvironment; -// TODO: Move this to a separate file -constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; -constexpr u32 CACHE_VERSION{2}; - -class GraphicsEnvironment final : public GenericEnvironment { -public: - explicit GraphicsEnvironment() = default; - explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program, - GPUVAddr program_base_, u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { - gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); - switch (program) { - case Maxwell::ShaderProgram::VertexA: - stage = Shader::Stage::VertexA; - stage_index = 0; - break; - case Maxwell::ShaderProgram::VertexB: - stage = Shader::Stage::VertexB; - stage_index = 0; - break; - case Maxwell::ShaderProgram::TesselationControl: - stage = Shader::Stage::TessellationControl; - stage_index = 1; - break; - case Maxwell::ShaderProgram::TesselationEval: - stage = Shader::Stage::TessellationEval; - stage_index = 2; - break; - case Maxwell::ShaderProgram::Geometry: - stage = Shader::Stage::Geometry; - stage_index = 3; - break; - case Maxwell::ShaderProgram::Fragment: - stage = Shader::Stage::Fragment; - stage_index = 4; - break; - default: - UNREACHABLE_MSG("Invalid program={}", program); - break; - } - const u64 local_size{sph.LocalMemorySize()}; - ASSERT(local_size <= std::numeric_limits<u32>::max()); - local_memory_size = static_cast<u32>(local_size); - texture_bound = maxwell3d->regs.tex_cb_index; - } - - ~GraphicsEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; - ASSERT(cbuf.enabled); - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{maxwell3d->regs}; - const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); - } - -private: - Tegra::Engines::Maxwell3D* maxwell3d{}; - size_t stage_index{}; -}; - -class ComputeEnvironment final : public GenericEnvironment { -public: - explicit ComputeEnvironment() = default; - explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, - u32 start_address_) - : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ - &kepler_compute_} { - const auto& qmd{kepler_compute->launch_description}; - stage = Shader::Stage::Compute; - local_memory_size = qmd.local_pos_alloc; - texture_bound = kepler_compute->regs.tex_cb_index; - shared_memory_size = qmd.shared_alloc; - workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; - } - - ~ComputeEnvironment() override = default; - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto& qmd{kepler_compute->launch_description}; - ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); - const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; - u32 value{}; - if (cbuf_offset < cbuf.size) { - value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); - } - cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); - return value; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto& regs{kepler_compute->regs}; - const auto& qmd{kepler_compute->launch_description}; - return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); - } - -private: - Tegra::Engines::KeplerCompute* kepler_compute{}; -}; - -void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, - std::ofstream& file) { - if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { - return; - } - const u32 num_envs{static_cast<u32>(envs.size())}; - file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); - for (const GenericEnvironment* const env : envs) { - env->Serialize(file); - } - file.write(key.data(), key.size_bytes()); -} - -template <typename Key, typename Envs> -void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) { - try { - std::ofstream file; - file.exceptions(std::ifstream::failbit); - Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app); - if (!file.is_open()) { - LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename); - return; - } - if (file.tellp() == 0) { - file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) - .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION)); - } - const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key)); - SerializePipeline(key_span, MakeSpan(envs), file); - - } catch (const std::ios_base::failure& e) { - LOG_ERROR(Common_Filesystem, "{}", e.what()); - if (!Common::FS::Delete(filename)) { - LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename); - } - } +template <typename Container> +auto MakeSpan(Container& container) { + return std::span(container.data(), container.size()); } -class FileEnvironment final : public Shader::Environment { -public: - void Deserialize(std::ifstream& file) { - u64 code_size{}; - u64 num_texture_types{}; - u64 num_cbuf_values{}; - file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) - .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) - .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) - .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) - .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) - .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) - .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) - .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) - .read(reinterpret_cast<char*>(&stage), sizeof(stage)); - code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); - file.read(reinterpret_cast<char*>(code.get()), code_size); - for (size_t i = 0; i < num_texture_types; ++i) { - u32 key; - Shader::TextureType type; - file.read(reinterpret_cast<char*>(&key), sizeof(key)) - .read(reinterpret_cast<char*>(&type), sizeof(type)); - texture_types.emplace(key, type); - } - for (size_t i = 0; i < num_cbuf_values; ++i) { - u64 key; - u32 value; - file.read(reinterpret_cast<char*>(&key), sizeof(key)) - .read(reinterpret_cast<char*>(&value), sizeof(value)); - cbuf_values.emplace(key, value); - } - if (stage == Shader::Stage::Compute) { - file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) - .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); - } else { - file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); - } - } - - u64 ReadInstruction(u32 address) override { - if (address < read_lowest || address > read_highest) { - throw Shader::LogicError("Out of bounds address {}", address); - } - return code[(address - read_lowest) / sizeof(u64)]; - } - - u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override { - const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; - if (it == cbuf_values.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - Shader::TextureType ReadTextureType(u32 handle) override { - const auto it{texture_types.find(handle)}; - if (it == texture_types.end()) { - throw Shader::LogicError("Uncached read texture type"); - } - return it->second; - } - - u32 LocalMemorySize() const override { - return local_memory_size; - } - - u32 SharedMemorySize() const override { - return shared_memory_size; - } - - u32 TextureBoundBuffer() const override { - return texture_bound; - } - - std::array<u32, 3> WorkgroupSize() const override { - return workgroup_size; - } - -private: - std::unique_ptr<u64[]> code; - std::unordered_map<u32, Shader::TextureType> texture_types; - std::unordered_map<u64, u32> cbuf_values; - std::array<u32, 3> workgroup_size{}; - u32 local_memory_size{}; - u32 shared_memory_size{}; - u32 texture_bound{}; - u32 read_lowest{}; - u32 read_highest{}; -}; - Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) { switch (comparison) { case Maxwell::ComparisonOp::Never: @@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso } } // Anonymous namespace -void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, - const VideoCore::DiskResourceLoadCallback& callback) { - if (title_id == 0) { - return; - } - std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)}; - std::string base_dir{shader_dir + "/vulkan"}; - std::string transferable_dir{base_dir + "/transferable"}; - std::string precompiled_dir{base_dir + "/precompiled"}; - if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || - !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { - LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); - return; - } - pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - - struct { - std::mutex mutex; - size_t total{0}; - size_t built{0}; - bool has_loaded{false}; - } state; - - std::ifstream file; - Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate); - if (!file.is_open()) { - return; - } - file.exceptions(std::ifstream::failbit); - const auto end{file.tellg()}; - file.seekg(0, std::ios::beg); - - std::array<char, 8> magic_number; - u32 cache_version; - file.read(magic_number.data(), magic_number.size()) - .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); - if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { - file.close(); - if (Common::FS::Delete(pipeline_cache_filename)) { - if (magic_number != MAGIC_NUMBER) { - LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file"); - } - if (cache_version != CACHE_VERSION) { - LOG_INFO(Render_Vulkan, "Deleting old pipeline cache"); - } - } else { - LOG_ERROR(Render_Vulkan, - "Invalid pipeline cache file and failed to delete it in \"{}\"", - pipeline_cache_filename); - } - return; - } - while (file.tellg() != end) { - if (stop_loading) { - return; - } - u32 num_envs{}; - file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); - std::vector<FileEnvironment> envs(num_envs); - for (FileEnvironment& env : envs) { - env.Deserialize(file); - } - if (envs.front().ShaderStage() == Shader::Stage::Compute) { - ComputePipelineCacheKey key; - file.read(reinterpret_cast<char*>(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - - std::lock_guard lock{state.mutex}; - compute_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } else { - GraphicsPipelineCacheKey key; - file.read(reinterpret_cast<char*>(&key), sizeof(key)); - - workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { - ShaderPools pools; - boost::container::static_vector<Shader::Environment*, 5> env_ptrs; - for (auto& env : envs) { - env_ptrs.push_back(&env); - } - auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - - std::lock_guard lock{state.mutex}; - graphics_cache.emplace(key, std::move(pipeline)); - ++state.built; - if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); - } - }); - } - ++state.total; - } - { - std::lock_guard lock{state.mutex}; - callback(VideoCore::LoadCallbackStage::Build, 0, state.total); - state.has_loaded = true; - } - workers.WaitForRequests(); -} - size_t ComputePipelineCacheKey::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this); return static_cast<size_t>(hash); @@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c return std::memcmp(&rhs, this, Size()) == 0; } -PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, - Tegra::Engines::Maxwell3D& maxwell3d_, +PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, VKScheduler& scheduler_, DescriptorPool& descriptor_pool_, VKUpdateDescriptorQueue& update_descriptor_queue_, RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_, TextureCache& texture_cache_) - : VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, - scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, + : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, + device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"), @@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default; GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - if (!RefreshStages()) { + if (!RefreshStages(graphics_key.unique_hashes)) { current_pipeline = nullptr; return nullptr; } @@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { ComputePipeline* PipelineCache::CurrentComputePipeline() { MICROPROFILE_SCOPE(Vulkan_PipelineCache); - const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; - const auto& qmd{kepler_compute.launch_description}; - const GPUVAddr shader_addr{program_base + qmd.program_start}; - const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - return nullptr; - } - const ShaderInfo* shader{TryGet(*cpu_shader_addr)}; + const ShaderInfo* const shader{ComputeShader()}; if (!shader) { - ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; - shader = MakeShaderInfo(env, *cpu_shader_addr); + return nullptr; } + const auto& qmd{kepler_compute.launch_description}; const ComputePipelineCacheKey key{ - .unique_hash{shader->unique_hash}, - .shared_memory_size{qmd.shared_alloc}, + .unique_hash = shader->unique_hash, + .shared_memory_size = qmd.shared_alloc, .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, }; const auto [pair, is_new]{compute_cache.try_emplace(key)}; @@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { return pipeline.get(); } -bool PipelineCache::RefreshStages() { - auto& dirty{maxwell3d.dirty.flags}; - if (!dirty[VideoCommon::Dirty::Shaders]) { - return last_valid_shaders; +void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, + const VideoCore::DiskResourceLoadCallback& callback) { + if (title_id == 0) { + return; } - dirty[VideoCommon::Dirty::Shaders] = false; - - const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; - for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { - graphics_key.unique_hashes[index] = u128{}; - continue; - } - const auto& shader_config{maxwell3d.regs.shader_config[index]}; - const auto program{static_cast<Maxwell::ShaderProgram>(index)}; - const GPUVAddr shader_addr{base_addr + shader_config.offset}; - const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; - if (!cpu_shader_addr) { - LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr); - last_valid_shaders = false; - return false; - } - const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; - if (!shader_info) { - const u32 start_address{shader_config.offset}; - GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; - shader_info = MakeShaderInfo(env, *cpu_shader_addr); - } - shader_infos[index] = shader_info; - graphics_key.unique_hashes[index] = shader_info->unique_hash; + auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)}; + auto base_dir{shader_dir / "vulkan"}; + auto transferable_dir{base_dir / "transferable"}; + auto precompiled_dir{base_dir / "precompiled"}; + if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) || + !Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) { + LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories"); + return; } - last_valid_shaders = true; - return true; -} + pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id); -const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { - auto info = std::make_unique<ShaderInfo>(); - if (const std::optional<u128> cached_hash{env.Analyze()}) { - info->unique_hash = *cached_hash; - info->size_bytes = env.CachedSize(); - } else { - // Slow path, not really hit on commercial games - // Build a control flow graph to get the real shader size - main_pools.flow_block.ReleaseContents(); - Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()}; - info->unique_hash = env.CalculateHash(); - info->size_bytes = env.ReadSize(); - } - const size_t size_bytes{info->size_bytes}; - const ShaderInfo* const result{info.get()}; - Register(std::move(info), cpu_addr, size_bytes); - return result; + struct { + std::mutex mutex; + size_t total{0}; + size_t built{0}; + bool has_loaded{false}; + } state; + + const auto load_compute{[&](std::ifstream& file, FileEnvironment env) { + ComputePipelineCacheKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + + workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable { + ShaderPools pools; + auto pipeline{CreateComputePipeline(pools, key, env, false)}; + + std::lock_guard lock{state.mutex}; + compute_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) { + GraphicsPipelineCacheKey key; + file.read(reinterpret_cast<char*>(&key), sizeof(key)); + + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { + ShaderPools pools; + boost::container::static_vector<Shader::Environment*, 5> env_ptrs; + for (auto& env : envs) { + env_ptrs.push_back(&env); + } + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; + + std::lock_guard lock{state.mutex}; + graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; + if (state.has_loaded) { + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); + } + }); + ++state.total; + }}; + VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics); + + std::unique_lock lock{state.mutex}; + callback(VideoCore::LoadCallbackStage::Build, 0, state.total); + state.has_loaded = true; + lock.unlock(); + + workers.WaitForRequests(); } std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( @@ -815,7 +288,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( size_t env_index{0}; std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } Shader::Environment& env{*envs[env_index]}; @@ -830,7 +303,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( u32 binding{0}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] == u128{}) { + if (key.unique_hashes[index] == 0) { continue; } UNIMPLEMENTED_IF(index == 0); @@ -844,8 +317,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( device.SaveShader(code); modules[stage_index] = BuildShader(device, code); if (device.HasDebuggingToolAttached()) { - const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0], - key.unique_hashes[index][1])}; + const std::string name{fmt::format("{:016x}", key.unique_hashes[index])}; modules[stage_index].SetObjectNameEXT(name.c_str()); } } @@ -863,7 +335,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (graphics_key.unique_hashes[index] == u128{}) { + if (graphics_key.unique_hashes[index] == 0) { continue; } const auto program{static_cast<Maxwell::ShaderProgram>(index)}; @@ -871,7 +343,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { const u32 start_address{maxwell3d.regs.shader_config[index].offset}; env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; env.SetCachedSize(shader_infos[index]->size_bytes); - envs.push_back(&env); } auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; @@ -882,11 +353,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> env_ptrs; for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { - if (key.unique_hashes[index] != u128{}) { + if (key.unique_hashes[index] != 0) { env_ptrs.push_back(&envs[index]); } } - SerializePipeline(key, env_ptrs, pipeline_cache_filename); + VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename); }); return pipeline; } @@ -902,8 +373,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { serialization_thread.QueueWork([this, key, env = std::move(env)] { - SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, - pipeline_cache_filename); + VideoCommon::SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env}, + pipeline_cache_filename); }); } return pipeline; @@ -921,7 +392,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline( device.SaveShader(code); vk::ShaderModule spv_module{BuildShader(device, code)}; if (device.HasDebuggingToolAttached()) { - const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; + const auto name{fmt::format("{:016x}", key.unique_hash)}; spv_module.SetObjectNameEXT(name.c_str()); } Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; @@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Profile profile{base_profile}; const Shader::Stage stage{program.stage}; - const bool has_geometry{key.unique_hashes[4] != u128{}}; + const bool has_geometry{key.unique_hashes[4] != 0}; const bool gl_ndc{key.state.ndc_minus_one_to_one != 0}; const float point_size{Common::BitCast<float>(key.state.point_size)}; switch (stage) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index eec17d3fd..4e48b4956 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -6,6 +6,7 @@ #include <array> #include <cstddef> +#include <filesystem> #include <iosfwd> #include <memory> #include <type_traits> @@ -42,7 +43,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; struct ComputePipelineCacheKey { - u128 unique_hash; + u64 unique_hash; u32 shared_memory_size; std::array<u32, 3> workgroup_size; @@ -76,16 +77,12 @@ namespace Vulkan { class ComputePipeline; class Device; class DescriptorPool; -class GenericEnvironment; class RasterizerVulkan; class RenderPassCache; class VKScheduler; class VKUpdateDescriptorQueue; -struct ShaderInfo { - u128 unique_hash{}; - size_t size_bytes{}; -}; +using VideoCommon::ShaderInfo; struct ShaderPools { void ReleaseContents() { @@ -99,17 +96,16 @@ struct ShaderPools { Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; }; -class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> { +class PipelineCache : public VideoCommon::ShaderCache { public: - explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu, - Tegra::Engines::Maxwell3D& maxwell3d, + explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d, Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, const Device& device, VKScheduler& scheduler, DescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, RenderPassCache& render_pass_cache, BufferCache& buffer_cache, TextureCache& texture_cache); - ~PipelineCache() override; + ~PipelineCache(); [[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline(); @@ -119,10 +115,6 @@ public: const VideoCore::DiskResourceLoadCallback& callback); private: - bool RefreshStages(); - - const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(); std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline( @@ -140,11 +132,6 @@ private: Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, const Shader::IR::Program& program); - Tegra::GPU& gpu; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; - const Device& device; VKScheduler& scheduler; DescriptorPool& descriptor_pool; @@ -156,16 +143,13 @@ private: GraphicsPipelineCacheKey graphics_key{}; GraphicsPipeline* current_pipeline{}; - std::array<const ShaderInfo*, 6> shader_infos{}; - bool last_valid_shaders{}; - std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache; std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; - std::string pipeline_cache_filename; + std::filesystem::path pipeline_cache_filename; Common::ThreadWorker workers; Common::ThreadWorker serialization_thread; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 7df169c85..fa6daeb3a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool, update_descriptor_queue, descriptor_pool), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), - pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, + pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler, descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache, texture_cache), query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache }, diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp new file mode 100644 index 000000000..b8b8eace5 --- /dev/null +++ b/src/video_core/shader_cache.cpp @@ -0,0 +1,233 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <vector> + +#include "common/assert.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" +#include "shader_recompiler/object_pool.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_cache.h" +#include "video_core/shader_environment.h" + +namespace VideoCommon { + +void ShaderCache::InvalidateRegion(VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); + RemovePendingShaders(); +} + +void ShaderCache::OnCPUWrite(VAddr addr, size_t size) { + std::lock_guard lock{invalidation_mutex}; + InvalidatePagesInRegion(addr, size); +} + +void ShaderCache::SyncGuestHost() { + std::scoped_lock lock{invalidation_mutex}; + RemovePendingShaders(); +} + +ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_) + : gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, + rasterizer{rasterizer_} {} + +bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) { + auto& dirty{maxwell3d.dirty.flags}; + if (!dirty[VideoCommon::Dirty::Shaders]) { + return last_shaders_valid; + } + dirty[VideoCommon::Dirty::Shaders] = false; + + const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; + for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) { + if (!maxwell3d.regs.IsShaderConfigEnabled(index)) { + unique_hashes[index] = 0; + continue; + } + const auto& shader_config{maxwell3d.regs.shader_config[index]}; + const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; + const GPUVAddr shader_addr{base_addr + shader_config.offset}; + const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + last_shaders_valid = false; + return false; + } + const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)}; + if (!shader_info) { + const u32 start_address{shader_config.offset}; + GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address}; + shader_info = MakeShaderInfo(env, *cpu_shader_addr); + } + shader_infos[index] = shader_info; + unique_hashes[index] = shader_info->unique_hash; + } + last_shaders_valid = true; + return true; +} + +const ShaderInfo* ShaderCache::ComputeShader() { + const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; + const auto& qmd{kepler_compute.launch_description}; + const GPUVAddr shader_addr{program_base + qmd.program_start}; + const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)}; + if (!cpu_shader_addr) { + LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr); + return nullptr; + } + if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) { + return shader; + } + ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; + return MakeShaderInfo(env, *cpu_shader_addr); +} + +ShaderInfo* ShaderCache::TryGet(VAddr addr) const { + std::scoped_lock lock{lookup_mutex}; + + const auto it = lookup_cache.find(addr); + if (it == lookup_cache.end()) { + return nullptr; + } + return it->second->data; +} + +void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) { + std::scoped_lock lock{invalidation_mutex, lookup_mutex}; + + const VAddr addr_end = addr + size; + Entry* const entry = NewEntry(addr, addr_end, data.get()); + + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + invalidation_cache[page].push_back(entry); + } + + storage.push_back(std::move(data)); + + rasterizer.UpdatePagesCachedCount(addr, size, 1); +} + +void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) { + const VAddr addr_end = addr + size; + const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { + auto it = invalidation_cache.find(page); + if (it == invalidation_cache.end()) { + continue; + } + InvalidatePageEntries(it->second, addr, addr_end); + } +} + +void ShaderCache::RemovePendingShaders() { + if (marked_for_removal.empty()) { + return; + } + // Remove duplicates + std::ranges::sort(marked_for_removal); + marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), + marked_for_removal.end()); + + std::vector<ShaderInfo*> removed_shaders; + removed_shaders.reserve(marked_for_removal.size()); + + std::scoped_lock lock{lookup_mutex}; + + for (Entry* const entry : marked_for_removal) { + removed_shaders.push_back(entry->data); + + const auto it = lookup_cache.find(entry->addr_start); + ASSERT(it != lookup_cache.end()); + lookup_cache.erase(it); + } + marked_for_removal.clear(); + + if (!removed_shaders.empty()) { + RemoveShadersFromStorage(std::move(removed_shaders)); + } +} + +void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { + size_t index = 0; + while (index < entries.size()) { + Entry* const entry = entries[index]; + if (!entry->Overlaps(addr, addr_end)) { + ++index; + continue; + } + + UnmarkMemory(entry); + RemoveEntryFromInvalidationCache(entry); + marked_for_removal.push_back(entry); + } +} + +void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) { + const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; + for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { + const auto entries_it = invalidation_cache.find(page); + ASSERT(entries_it != invalidation_cache.end()); + std::vector<Entry*>& entries = entries_it->second; + + const auto entry_it = std::ranges::find(entries, entry); + ASSERT(entry_it != entries.end()); + entries.erase(entry_it); + } +} + +void ShaderCache::UnmarkMemory(Entry* entry) { + if (!entry->is_memory_marked) { + return; + } + entry->is_memory_marked = false; + + const VAddr addr = entry->addr_start; + const size_t size = entry->addr_end - addr; + rasterizer.UpdatePagesCachedCount(addr, size, -1); +} + +void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) { + // Remove them from the cache + std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) { + return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end(); + }); +} + +ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) { + auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); + Entry* const entry_pointer = entry.get(); + + lookup_cache.emplace(addr, std::move(entry)); + return entry_pointer; +} + +const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) { + auto info = std::make_unique<ShaderInfo>(); + if (const std::optional<u64> cached_hash{env.Analyze()}) { + info->unique_hash = *cached_hash; + info->size_bytes = env.CachedSize(); + } else { + // Slow path, not really hit on commercial games + // Build a control flow graph to get the real shader size + Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; + Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()}; + info->unique_hash = env.CalculateHash(); + info->size_bytes = env.ReadSize(); + } + const size_t size_bytes{info->size_bytes}; + const ShaderInfo* const result{info.get()}; + Register(std::move(info), cpu_addr, size_bytes); + return result; +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h index 015a789d6..89a4bcc84 100644 --- a/src/video_core/shader_cache.h +++ b/src/video_core/shader_cache.h @@ -4,20 +4,28 @@ #pragma once -#include <algorithm> #include <memory> #include <mutex> #include <unordered_map> #include <utility> #include <vector> -#include "common/assert.h" #include "common/common_types.h" #include "video_core/rasterizer_interface.h" +namespace Tegra { +class MemoryManager; +} + namespace VideoCommon { -template <class T> +class GenericEnvironment; + +struct ShaderInfo { + u64 unique_hash{}; + size_t size_bytes{}; +}; + class ShaderCache { static constexpr u64 PAGE_BITS = 14; static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; @@ -25,206 +33,100 @@ class ShaderCache { struct Entry { VAddr addr_start; VAddr addr_end; - T* data; + ShaderInfo* data; bool is_memory_marked = true; - constexpr bool Overlaps(VAddr start, VAddr end) const noexcept { + bool Overlaps(VAddr start, VAddr end) const noexcept { return start < addr_end && addr_start < end; } }; public: - virtual ~ShaderCache() = default; - /// @brief Removes shaders inside a given region /// @note Checks for ranges /// @param addr Start address of the invalidation /// @param size Number of bytes of the invalidation - void InvalidateRegion(VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - RemovePendingShaders(); - } + void InvalidateRegion(VAddr addr, size_t size); /// @brief Unmarks a memory region as cached and marks it for removal /// @param addr Start address of the CPU write operation /// @param size Number of bytes of the CPU write operation - void OnCPUWrite(VAddr addr, std::size_t size) { - std::lock_guard lock{invalidation_mutex}; - InvalidatePagesInRegion(addr, size); - } + void OnCPUWrite(VAddr addr, size_t size); /// @brief Flushes delayed removal operations - void SyncGuestHost() { - std::scoped_lock lock{invalidation_mutex}; - RemovePendingShaders(); - } + void SyncGuestHost(); + +protected: + explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, + Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_); + + /// @brief Update the hashes and information of shader stages + /// @param unique_hashes Shader hashes to store into when a stage is enabled + /// @return True no success, false on error + bool RefreshStages(std::array<u64, 6>& unique_hashes); + + /// @brief Returns information about the current compute shader + /// @return Pointer to a valid shader, nullptr on error + const ShaderInfo* ComputeShader(); + + Tegra::MemoryManager& gpu_memory; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + std::array<const ShaderInfo*, 6> shader_infos{}; + bool last_shaders_valid = false; + +private: /// @brief Tries to obtain a cached shader starting in a given address /// @note Doesn't check for ranges, the given address has to be the start of the shader /// @param addr Start address of the shader, this doesn't cache for region /// @return Pointer to a valid shader, nullptr when nothing is found - T* TryGet(VAddr addr) const { - std::scoped_lock lock{lookup_mutex}; - - const auto it = lookup_cache.find(addr); - if (it == lookup_cache.end()) { - return nullptr; - } - return it->second->data; - } - -protected: - explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {} + ShaderInfo* TryGet(VAddr addr) const; /// @brief Register in the cache a given entry /// @param data Shader to store in the cache /// @param addr Start address of the shader that will be registered /// @param size Size in bytes of the shader - void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) { - std::scoped_lock lock{invalidation_mutex, lookup_mutex}; - - const VAddr addr_end = addr + size; - Entry* const entry = NewEntry(addr, addr_end, data.get()); - - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - invalidation_cache[page].push_back(entry); - } - - storage.push_back(std::move(data)); + void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size); - rasterizer.UpdatePagesCachedCount(addr, size, 1); - } - - /// @brief Called when a shader is going to be removed - /// @param shader Shader that will be removed - /// @pre invalidation_cache is locked - /// @pre lookup_mutex is locked - virtual void OnShaderRemoval([[maybe_unused]] T* shader) {} - -private: /// @brief Invalidate pages in a given region /// @pre invalidation_mutex is locked - void InvalidatePagesInRegion(VAddr addr, std::size_t size) { - const VAddr addr_end = addr + size; - const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) { - auto it = invalidation_cache.find(page); - if (it == invalidation_cache.end()) { - continue; - } - InvalidatePageEntries(it->second, addr, addr_end); - } - } + void InvalidatePagesInRegion(VAddr addr, size_t size); /// @brief Remove shaders marked for deletion /// @pre invalidation_mutex is locked - void RemovePendingShaders() { - if (marked_for_removal.empty()) { - return; - } - // Remove duplicates - std::sort(marked_for_removal.begin(), marked_for_removal.end()); - marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()), - marked_for_removal.end()); - - std::vector<T*> removed_shaders; - removed_shaders.reserve(marked_for_removal.size()); - - std::scoped_lock lock{lookup_mutex}; - - for (Entry* const entry : marked_for_removal) { - removed_shaders.push_back(entry->data); - - const auto it = lookup_cache.find(entry->addr_start); - ASSERT(it != lookup_cache.end()); - lookup_cache.erase(it); - } - marked_for_removal.clear(); - - if (!removed_shaders.empty()) { - RemoveShadersFromStorage(std::move(removed_shaders)); - } - } + void RemovePendingShaders(); /// @brief Invalidates entries in a given range for the passed page /// @param entries Vector of entries in the page, it will be modified on overlaps /// @param addr Start address of the invalidation /// @param addr_end Non-inclusive end address of the invalidation /// @pre invalidation_mutex is locked - void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) { - std::size_t index = 0; - while (index < entries.size()) { - Entry* const entry = entries[index]; - if (!entry->Overlaps(addr, addr_end)) { - ++index; - continue; - } - - UnmarkMemory(entry); - RemoveEntryFromInvalidationCache(entry); - marked_for_removal.push_back(entry); - } - } + void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end); /// @brief Removes all references to an entry in the invalidation cache /// @param entry Entry to remove from the invalidation cache /// @pre invalidation_mutex is locked - void RemoveEntryFromInvalidationCache(const Entry* entry) { - const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS; - for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) { - const auto entries_it = invalidation_cache.find(page); - ASSERT(entries_it != invalidation_cache.end()); - std::vector<Entry*>& entries = entries_it->second; - - const auto entry_it = std::find(entries.begin(), entries.end(), entry); - ASSERT(entry_it != entries.end()); - entries.erase(entry_it); - } - } + void RemoveEntryFromInvalidationCache(const Entry* entry); /// @brief Unmarks an entry from the rasterizer cache /// @param entry Entry to unmark from memory - void UnmarkMemory(Entry* entry) { - if (!entry->is_memory_marked) { - return; - } - entry->is_memory_marked = false; - - const VAddr addr = entry->addr_start; - const std::size_t size = entry->addr_end - addr; - rasterizer.UpdatePagesCachedCount(addr, size, -1); - } + void UnmarkMemory(Entry* entry); /// @brief Removes a vector of shaders from a list /// @param removed_shaders Shaders to be removed from the storage /// @pre invalidation_mutex is locked /// @pre lookup_mutex is locked - void RemoveShadersFromStorage(std::vector<T*> removed_shaders) { - // Notify removals - for (T* const shader : removed_shaders) { - OnShaderRemoval(shader); - } - - // Remove them from the cache - const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) { - return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) != - removed_shaders.end(); - }; - std::erase_if(storage, is_removed); - } + void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders); /// @brief Creates a new entry in the lookup cache and returns its pointer /// @pre lookup_mutex is locked - Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) { - auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data}); - Entry* const entry_pointer = entry.get(); + Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data); - lookup_cache.emplace(addr, std::move(entry)); - return entry_pointer; - } + /// @brief Create a new shader entry and register it + const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); VideoCore::RasterizerInterface& rasterizer; @@ -233,7 +135,7 @@ private: std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache; std::unordered_map<u64, std::vector<Entry*>> invalidation_cache; - std::vector<std::unique_ptr<T>> storage; + std::vector<std::unique_ptr<ShaderInfo>> storage; std::vector<Entry*> marked_for_removal; }; diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp new file mode 100644 index 000000000..5dccc0097 --- /dev/null +++ b/src/video_core/shader_environment.cpp @@ -0,0 +1,453 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <filesystem> +#include <fstream> +#include <memory> +#include <optional> +#include <utility> + +#include "common/assert.h" +#include "common/cityhash.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "common/fs/fs.h" +#include "common/logging/log.h" +#include "shader_recompiler/environment.h" +#include "video_core/memory_manager.h" +#include "video_core/shader_environment.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'}; +constexpr u32 CACHE_VERSION = 3; + +constexpr size_t INST_SIZE = sizeof(u64); + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +static u64 MakeCbufKey(u32 index, u32 offset) { + return (static_cast<u64>(index) << 32) | offset; +} + +static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) { + switch (entry.texture_type) { + case Tegra::Texture::TextureType::Texture1D: + return Shader::TextureType::Color1D; + case Tegra::Texture::TextureType::Texture2D: + case Tegra::Texture::TextureType::Texture2DNoMipmap: + return Shader::TextureType::Color2D; + case Tegra::Texture::TextureType::Texture3D: + return Shader::TextureType::Color3D; + case Tegra::Texture::TextureType::TextureCubemap: + return Shader::TextureType::ColorCube; + case Tegra::Texture::TextureType::Texture1DArray: + return Shader::TextureType::ColorArray1D; + case Tegra::Texture::TextureType::Texture2DArray: + return Shader::TextureType::ColorArray2D; + case Tegra::Texture::TextureType::Texture1DBuffer: + return Shader::TextureType::Buffer; + case Tegra::Texture::TextureType::TextureCubeArray: + return Shader::TextureType::ColorArrayCube; + default: + throw Shader::NotImplementedException("Unknown texture type"); + } +} + +GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : gpu_memory{&gpu_memory_}, program_base{program_base_} { + start_address = start_address_; +} + +GenericEnvironment::~GenericEnvironment() = default; + +u32 GenericEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +u32 GenericEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 GenericEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +std::array<u32, 3> GenericEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +u64 GenericEnvironment::ReadInstruction(u32 address) { + read_lowest = std::min(read_lowest, address); + read_highest = std::max(read_highest, address); + + if (address >= cached_lowest && address < cached_highest) { + return code[(address - cached_lowest) / INST_SIZE]; + } + has_unbound_instructions = true; + return gpu_memory->Read<u64>(program_base + address); +} + +std::optional<u64> GenericEnvironment::Analyze() { + const std::optional<u64> size{TryFindSize()}; + if (!size) { + return std::nullopt; + } + cached_lowest = start_address; + cached_highest = start_address + static_cast<u32>(*size); + return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size); +} + +void GenericEnvironment::SetCachedSize(size_t size_bytes) { + cached_lowest = start_address; + cached_highest = start_address + static_cast<u32>(size_bytes); + code.resize(CachedSize()); + gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64)); +} + +size_t GenericEnvironment::CachedSize() const noexcept { + return cached_highest - cached_lowest + INST_SIZE; +} + +size_t GenericEnvironment::ReadSize() const noexcept { + return read_highest - read_lowest + INST_SIZE; +} + +bool GenericEnvironment::CanBeSerialized() const noexcept { + return !has_unbound_instructions; +} + +u64 GenericEnvironment::CalculateHash() const { + const size_t size{ReadSize()}; + const auto data{std::make_unique<char[]>(size)}; + gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size); + return Common::CityHash64(data.get(), size); +} + +void GenericEnvironment::Serialize(std::ofstream& file) const { + const u64 code_size{static_cast<u64>(CachedSize())}; + const u64 num_texture_types{static_cast<u64>(texture_types.size())}; + const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())}; + + file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size)) + .write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types)) + .write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values)) + .write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size)) + .write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound)) + .write(reinterpret_cast<const char*>(&start_address), sizeof(start_address)) + .write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest)) + .write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest)) + .write(reinterpret_cast<const char*>(&stage), sizeof(stage)) + .write(reinterpret_cast<const char*>(code.data()), code_size); + for (const auto [key, type] : texture_types) { + file.write(reinterpret_cast<const char*>(&key), sizeof(key)) + .write(reinterpret_cast<const char*>(&type), sizeof(type)); + } + for (const auto [key, type] : cbuf_values) { + file.write(reinterpret_cast<const char*>(&key), sizeof(key)) + .write(reinterpret_cast<const char*>(&type), sizeof(type)); + } + if (stage == Shader::Stage::Compute) { + file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size)) + .write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.write(reinterpret_cast<const char*>(&sph), sizeof(sph)); + } +} + +std::optional<u64> GenericEnvironment::TryFindSize() { + static constexpr size_t BLOCK_SIZE = 0x1000; + static constexpr size_t MAXIMUM_SIZE = 0x100000; + + static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL; + static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL; + + GPUVAddr guest_addr{program_base + start_address}; + size_t offset{0}; + size_t size{BLOCK_SIZE}; + while (size <= MAXIMUM_SIZE) { + code.resize(size / INST_SIZE); + u64* const data = code.data() + offset / INST_SIZE; + gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE); + for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) { + const u64 inst = data[index / INST_SIZE]; + if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) { + return offset + index; + } + } + guest_addr += BLOCK_SIZE; + size += BLOCK_SIZE; + offset += BLOCK_SIZE; + } + return std::nullopt; +} + +Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, + bool via_header_index, u32 raw) { + const TextureHandle handle{raw, via_header_index}; + const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; + Tegra::Texture::TICEntry entry; + gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); + const Shader::TextureType result{ConvertType(entry)}; + texture_types.emplace(raw, result); + return result; +} + +GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Maxwell::ShaderProgram program, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} { + gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph)); + switch (program) { + case Maxwell::ShaderProgram::VertexA: + stage = Shader::Stage::VertexA; + stage_index = 0; + break; + case Maxwell::ShaderProgram::VertexB: + stage = Shader::Stage::VertexB; + stage_index = 0; + break; + case Maxwell::ShaderProgram::TesselationControl: + stage = Shader::Stage::TessellationControl; + stage_index = 1; + break; + case Maxwell::ShaderProgram::TesselationEval: + stage = Shader::Stage::TessellationEval; + stage_index = 2; + break; + case Maxwell::ShaderProgram::Geometry: + stage = Shader::Stage::Geometry; + stage_index = 3; + break; + case Maxwell::ShaderProgram::Fragment: + stage = Shader::Stage::Fragment; + stage_index = 4; + break; + default: + UNREACHABLE_MSG("Invalid program={}", program); + break; + } + const u64 local_size{sph.LocalMemorySize()}; + ASSERT(local_size <= std::numeric_limits<u32>::max()); + local_memory_size = static_cast<u32>(local_size); + texture_bound = maxwell3d->regs.tex_cb_index; +} + +u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]}; + ASSERT(cbuf.enabled); + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) { + const auto& regs{maxwell3d->regs}; + const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle); +} + +ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_) + : GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{ + &kepler_compute_} { + const auto& qmd{kepler_compute->launch_description}; + stage = Shader::Stage::Compute; + local_memory_size = qmd.local_pos_alloc; + texture_bound = kepler_compute->regs.tex_cb_index; + shared_memory_size = qmd.shared_alloc; + workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}; +} + +u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto& qmd{kepler_compute->launch_description}; + ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0); + const auto& cbuf{qmd.const_buffer_config[cbuf_index]}; + u32 value{}; + if (cbuf_offset < cbuf.size) { + value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset); + } + cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value); + return value; +} + +Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) { + const auto& regs{kepler_compute->regs}; + const auto& qmd{kepler_compute->launch_description}; + return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle); +} + +void FileEnvironment::Deserialize(std::ifstream& file) { + u64 code_size{}; + u64 num_texture_types{}; + u64 num_cbuf_values{}; + file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size)) + .read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types)) + .read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values)) + .read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size)) + .read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound)) + .read(reinterpret_cast<char*>(&start_address), sizeof(start_address)) + .read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest)) + .read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest)) + .read(reinterpret_cast<char*>(&stage), sizeof(stage)); + code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64))); + file.read(reinterpret_cast<char*>(code.get()), code_size); + for (size_t i = 0; i < num_texture_types; ++i) { + u32 key; + Shader::TextureType type; + file.read(reinterpret_cast<char*>(&key), sizeof(key)) + .read(reinterpret_cast<char*>(&type), sizeof(type)); + texture_types.emplace(key, type); + } + for (size_t i = 0; i < num_cbuf_values; ++i) { + u64 key; + u32 value; + file.read(reinterpret_cast<char*>(&key), sizeof(key)) + .read(reinterpret_cast<char*>(&value), sizeof(value)); + cbuf_values.emplace(key, value); + } + if (stage == Shader::Stage::Compute) { + file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size)) + .read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size)); + } else { + file.read(reinterpret_cast<char*>(&sph), sizeof(sph)); + } +} + +u64 FileEnvironment::ReadInstruction(u32 address) { + if (address < read_lowest || address > read_highest) { + throw Shader::LogicError("Out of bounds address {}", address); + } + return code[(address - read_lowest) / sizeof(u64)]; +} + +u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) { + const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))}; + if (it == cbuf_values.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) { + const auto it{texture_types.find(handle)}; + if (it == texture_types.end()) { + throw Shader::LogicError("Uncached read texture type"); + } + return it->second; +} + +u32 FileEnvironment::LocalMemorySize() const { + return local_memory_size; +} + +u32 FileEnvironment::SharedMemorySize() const { + return shared_memory_size; +} + +u32 FileEnvironment::TextureBoundBuffer() const { + return texture_bound; +} + +std::array<u32, 3> FileEnvironment::WorkgroupSize() const { + return workgroup_size; +} + +void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, + const std::filesystem::path& filename) try { + std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app); + file.exceptions(std::ifstream::failbit); + if (!file.is_open()) { + LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + return; + } + if (file.tellp() == 0) { + // Write header + file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size()) + .write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION)); + } + if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) { + return; + } + const u32 num_envs{static_cast<u32>(envs.size())}; + file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs)); + for (const GenericEnvironment* const env : envs) { + env->Serialize(file); + } + file.write(key.data(), key.size_bytes()); + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, + Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try { + std::ifstream file(filename, std::ios::binary | std::ios::ate); + if (!file.is_open()) { + return; + } + file.exceptions(std::ifstream::failbit); + const auto end{file.tellg()}; + file.seekg(0, std::ios::beg); + + std::array<char, 8> magic_number; + u32 cache_version; + file.read(magic_number.data(), magic_number.size()) + .read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version)); + if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) { + file.close(); + if (Common::FS::RemoveFile(filename)) { + if (magic_number != MAGIC_NUMBER) { + LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file"); + } + if (cache_version != CACHE_VERSION) { + LOG_INFO(Common_Filesystem, "Deleting old pipeline cache"); + } + } else { + LOG_ERROR(Common_Filesystem, + "Invalid pipeline cache file and failed to delete it in \"{}\"", + Common::FS::PathToUTF8String(filename)); + } + return; + } + while (file.tellg() != end) { + if (stop_loading.stop_requested()) { + return; + } + u32 num_envs{}; + file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs)); + std::vector<FileEnvironment> envs(num_envs); + for (FileEnvironment& env : envs) { + env.Deserialize(file); + } + if (envs.front().ShaderStage() == Shader::Stage::Compute) { + load_compute(file, std::move(envs.front())); + } else { + load_graphics(file, std::move(envs)); + } + } + +} catch (const std::ios_base::failure& e) { + LOG_ERROR(Common_Filesystem, "{}", e.what()); + if (!Common::FS::RemoveFile(filename)) { + LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", + Common::FS::PathToUTF8String(filename)); + } +} + +} // namespace VideoCommon diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h new file mode 100644 index 000000000..37d712045 --- /dev/null +++ b/src/video_core/shader_environment.h @@ -0,0 +1,198 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <array> +#include <atomic> +#include <filesystem> +#include <iosfwd> +#include <limits> +#include <memory> +#include <optional> +#include <span> +#include <type_traits> +#include <unordered_map> +#include <vector> + +#include "common/common_types.h" +#include "common/unique_function.h" +#include "shader_recompiler/environment.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/textures/texture.h" + +namespace Tegra { +class Memorymanager; +} + +namespace VideoCommon { + +struct TextureHandle { + explicit TextureHandle(u32 data, bool via_header_index) { + if (via_header_index) { + image = data; + sampler = data; + } else { + const Tegra::Texture::TextureHandle handle{data}; + image = handle.tic_id; + sampler = via_header_index ? image : handle.tsc_id.Value(); + } + } + + u32 image; + u32 sampler; +}; + +class GenericEnvironment : public Shader::Environment { +public: + explicit GenericEnvironment() = default; + explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~GenericEnvironment() override; + + [[nodiscard]] u32 TextureBoundBuffer() const final; + + [[nodiscard]] u32 LocalMemorySize() const final; + + [[nodiscard]] u32 SharedMemorySize() const final; + + [[nodiscard]] std::array<u32, 3> WorkgroupSize() const final; + + [[nodiscard]] u64 ReadInstruction(u32 address) final; + + [[nodiscard]] std::optional<u64> Analyze(); + + void SetCachedSize(size_t size_bytes); + + [[nodiscard]] size_t CachedSize() const noexcept; + + [[nodiscard]] size_t ReadSize() const noexcept; + + [[nodiscard]] bool CanBeSerialized() const noexcept; + + [[nodiscard]] u64 CalculateHash() const; + + void Serialize(std::ofstream& file) const; + +protected: + std::optional<u64> TryFindSize(); + + Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index, + u32 raw); + + Tegra::MemoryManager* gpu_memory{}; + GPUVAddr program_base{}; + + std::vector<u64> code; + std::unordered_map<u32, Shader::TextureType> texture_types; + std::unordered_map<u64, u32> cbuf_values; + + u32 local_memory_size{}; + u32 texture_bound{}; + u32 shared_memory_size{}; + std::array<u32, 3> workgroup_size{}; + + u32 read_lowest = std::numeric_limits<u32>::max(); + u32 read_highest = 0; + + u32 cached_lowest = std::numeric_limits<u32>::max(); + u32 cached_highest = 0; + + bool has_unbound_instructions = false; +}; + +class GraphicsEnvironment final : public GenericEnvironment { +public: + explicit GraphicsEnvironment() = default; + explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::MemoryManager& gpu_memory_, + Tegra::Engines::Maxwell3D::Regs::ShaderProgram program, + GPUVAddr program_base_, u32 start_address_); + + ~GraphicsEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::Maxwell3D* maxwell3d{}; + size_t stage_index{}; +}; + +class ComputeEnvironment final : public GenericEnvironment { +public: + explicit ComputeEnvironment() = default; + explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_, + u32 start_address_); + + ~ComputeEnvironment() override = default; + + u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + Shader::TextureType ReadTextureType(u32 handle) override; + +private: + Tegra::Engines::KeplerCompute* kepler_compute{}; +}; + +class FileEnvironment final : public Shader::Environment { +public: + FileEnvironment() = default; + ~FileEnvironment() override = default; + + FileEnvironment& operator=(FileEnvironment&&) noexcept = default; + FileEnvironment(FileEnvironment&&) noexcept = default; + + FileEnvironment& operator=(const FileEnvironment&) = delete; + FileEnvironment(const FileEnvironment&) = delete; + + void Deserialize(std::ifstream& file); + + [[nodiscard]] u64 ReadInstruction(u32 address) override; + + [[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override; + + [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override; + + [[nodiscard]] u32 LocalMemorySize() const override; + + [[nodiscard]] u32 SharedMemorySize() const override; + + [[nodiscard]] u32 TextureBoundBuffer() const override; + + [[nodiscard]] std::array<u32, 3> WorkgroupSize() const override; + +private: + std::unique_ptr<u64[]> code; + std::unordered_map<u32, Shader::TextureType> texture_types; + std::unordered_map<u64, u32> cbuf_values; + std::array<u32, 3> workgroup_size{}; + u32 local_memory_size{}; + u32 shared_memory_size{}; + u32 texture_bound{}; + u32 read_lowest{}; + u32 read_highest{}; +}; + +void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs, + const std::filesystem::path& filename); + +template <typename Key, typename Envs> +void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) { + static_assert(std::is_trivially_copyable_v<Key>); + static_assert(std::has_unique_object_representations_v<Key>); + SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)), + std::span(envs.data(), envs.size()), filename); +} + +void LoadPipelines( + std::stop_token stop_loading, const std::filesystem::path& filename, + Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute, + Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics); + +} // namespace VideoCommon |