diff options
author | yzct12345 <87620833+yzct12345@users.noreply.github.com> | 2021-08-05 22:46:24 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-08-05 22:46:24 +0200 |
commit | 5566f3dbc0db1de41fcd291f5b7588d9e055ba85 (patch) | |
tree | af6c8f05b6af04eab31aa59cbc211abd557f0edf /src/video_core | |
parent | texture_cache: Split templates out (diff) | |
download | yuzu-5566f3dbc0db1de41fcd291f5b7588d9e055ba85.tar yuzu-5566f3dbc0db1de41fcd291f5b7588d9e055ba85.tar.gz yuzu-5566f3dbc0db1de41fcd291f5b7588d9e055ba85.tar.bz2 yuzu-5566f3dbc0db1de41fcd291f5b7588d9e055ba85.tar.lz yuzu-5566f3dbc0db1de41fcd291f5b7588d9e055ba85.tar.xz yuzu-5566f3dbc0db1de41fcd291f5b7588d9e055ba85.tar.zst yuzu-5566f3dbc0db1de41fcd291f5b7588d9e055ba85.zip |
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 6 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache_base.cpp (renamed from src/video_core/renderer_opengl/gl_texture_cache_templates.cpp) | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_rasterizer.cpp | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache_base.cpp (renamed from src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp) | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_view_info.cpp | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 1711 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 402 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_templates.h | 1507 |
12 files changed, 1821 insertions, 1821 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1250cca6f..2f6cdd216 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -97,7 +97,7 @@ add_library(video_core STATIC renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.h - renderer_opengl/gl_texture_cache_templates.cpp + renderer_opengl/gl_texture_cache_base.cpp renderer_opengl/gl_query_cache.cpp renderer_opengl/gl_query_cache.h renderer_opengl/maxwell_to_gl.h @@ -156,7 +156,7 @@ add_library(video_core STATIC renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.h - renderer_vulkan/vk_texture_cache_templates.cpp + renderer_vulkan/vk_texture_cache_base.cpp renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.h shader_cache.cpp @@ -188,7 +188,7 @@ add_library(video_core STATIC texture_cache/samples_helper.h texture_cache/slot_vector.h texture_cache/texture_cache.h - texture_cache/texture_cache_templates.h + texture_cache/texture_cache_base.h texture_cache/types.h texture_cache/util.cpp texture_cache/util.h diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index fac0034fb..bccb37a58 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -15,7 +15,7 @@ #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/shader_notify.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #if defined(_MSC_VER) && defined(NDEBUG) #define LAMBDA_FORCEINLINE [[msvc::forceinline]] diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 41d2b73f4..b909c387e 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -32,7 +32,7 @@ #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/shader_cache.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 921072ebe..4a4f6301c 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -12,7 +12,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/util_shaders.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp index 00ed06447..385358fea 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache_templates.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache_base.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include "video_core/renderer_opengl/gl_texture_cache.h" -#include "video_core/texture_cache/texture_cache_templates.h" +#include "video_core/texture_cache/texture_cache.h" namespace VideoCommon { template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23cef2996..3ac18ea54 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -32,7 +32,7 @@ #include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/shader_cache.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 0b73d55f8..5fe6b7ba3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -9,7 +9,7 @@ #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" diff --git a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp index fd8978954..44e688342 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache_templates.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache_base.cpp @@ -3,7 +3,7 @@ // Refer to the license.txt file included. #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/texture_cache/texture_cache_templates.h" +#include "video_core/texture_cache/texture_cache.h" namespace VideoCommon { template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>; diff --git a/src/video_core/texture_cache/image_view_info.cpp b/src/video_core/texture_cache/image_view_info.cpp index faf5b151f..f14a92565 100644 --- a/src/video_core/texture_cache/image_view_info.cpp +++ b/src/video_core/texture_cache/image_view_info.cpp @@ -6,7 +6,7 @@ #include "common/assert.h" #include "video_core/texture_cache/image_view_info.h" -#include "video_core/texture_cache/texture_cache.h" +#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/texture_cache/types.h" #include "video_core/textures/texture.h" diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a4f6e9422..5884fa16e 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -4,48 +4,7 @@ #pragma once -#include <algorithm> -#include <array> -#include <bit> -#include <memory> -#include <mutex> -#include <optional> -#include <span> -#include <type_traits> -#include <unordered_map> -#include <unordered_set> -#include <utility> -#include <vector> - -#include <boost/container/small_vector.hpp> - -#include "common/alignment.h" -#include "common/common_types.h" -#include "common/literals.h" -#include "common/logging/log.h" -#include "common/settings.h" -#include "video_core/compatible_formats.h" -#include "video_core/delayed_destruction_ring.h" -#include "video_core/dirty_flags.h" -#include "video_core/engines/fermi_2d.h" -#include "video_core/engines/kepler_compute.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/memory_manager.h" -#include "video_core/rasterizer_interface.h" -#include "video_core/surface.h" -#include "video_core/texture_cache/descriptor_table.h" -#include "video_core/texture_cache/format_lookup_table.h" -#include "video_core/texture_cache/formatter.h" -#include "video_core/texture_cache/image_base.h" -#include "video_core/texture_cache/image_info.h" -#include "video_core/texture_cache/image_view_base.h" -#include "video_core/texture_cache/image_view_info.h" -#include "video_core/texture_cache/render_targets.h" -#include "video_core/texture_cache/samples_helper.h" -#include "video_core/texture_cache/slot_vector.h" -#include "video_core/texture_cache/types.h" -#include "video_core/texture_cache/util.h" -#include "video_core/textures/texture.h" +#include "video_core/texture_cache/texture_cache_base.h" namespace VideoCommon { @@ -62,341 +21,1487 @@ using VideoCore::Surface::SurfaceType; using namespace Common::Literals; template <class P> -class TextureCache { - /// Address shift for caching images into a hash table - static constexpr u64 PAGE_BITS = 20; - - /// Enables debugging features to the texture cache - static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; - /// Implement blits as copies between framebuffers - static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; - /// True when some copies have to be emulated - static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; - /// True when the API can provide info about the memory of the device. - static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; - - /// Image view ID for null descriptors - static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; - /// Sampler ID for bugged sampler ids - static constexpr SamplerId NULL_SAMPLER_ID{0}; - - static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; - static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; - - using Runtime = typename P::Runtime; - using Image = typename P::Image; - using ImageAlloc = typename P::ImageAlloc; - using ImageView = typename P::ImageView; - using Sampler = typename P::Sampler; - using Framebuffer = typename P::Framebuffer; - - struct BlitImages { - ImageId dst_id; - ImageId src_id; - PixelFormat dst_format; - PixelFormat src_format; - }; +TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, + Tegra::Engines::Maxwell3D& maxwell3d_, + Tegra::Engines::KeplerCompute& kepler_compute_, + Tegra::MemoryManager& gpu_memory_) + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { + // Configure null sampler + TSCEntry sampler_descriptor{}; + sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); + sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); + sampler_descriptor.cubemap_anisotropy.Assign(1); + + // Make sure the first index is reserved for the null resources + // This way the null resource becomes a compile time constant + void(slot_image_views.insert(runtime, NullImageParams{})); + void(slot_samplers.insert(runtime, sampler_descriptor)); + + deletion_iterator = slot_images.begin(); + + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + minimum_memory = 0; + } else { + // on OGL we can be more conservatives as the driver takes care. + expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; + critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; + minimum_memory = expected_memory; + } +} - template <typename T> - struct IdentityHash { - [[nodiscard]] size_t operator()(T value) const noexcept { - return static_cast<size_t>(value); +template <class P> +void TextureCache<P>::RunGarbageCollector() { + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_images.end()) { + deletion_iterator = slot_images.begin(); + if (deletion_iterator == slot_images.end()) { + break; + } } - }; - -public: - explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, - Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); - - /// Notify the cache that a new frame has been queued - void TickFrame(); - - /// Return a constant reference to the given image view id - [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; - - /// Return a reference to the given image view id - [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; - - /// Mark an image as modified from the GPU - void MarkModification(ImageId id) noexcept; - - /// Fill image_view_ids with the graphics images in indices - void FillGraphicsImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids); + auto [image_id, image_tmp] = *deletion_iterator; + Image* image = image_tmp; // fix clang error. + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); + const u64 ticks_needed = + is_bad_overlap + ? ticks_to_destroy >> 4 + : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); + should_care |= aggressive_mode; + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = std::ranges::all_of( + image->overlapping_images, [&, image](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return overlap.frame_tick >= image->frame_tick; + }); + if (!overlap_check) { + ++deletion_iterator; + continue; + } + } + if (!is_bad_overlap && must_download) { + const bool alias_check = std::ranges::none_of( + image->aliased_images, [&, image](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick < image->frame_tick) || + (alias_image.modification_tick < image->modification_tick); + }); + + if (alias_check) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); + } + } + if (True(image->flags & ImageFlagBits::Tracked)) { + UntrackImage(*image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + if (is_bad_overlap) { + ++num_iterations; + } + } + ++deletion_iterator; + } +} - /// Fill image_view_ids with the compute images in indices - void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); +template <class P> +void TextureCache<P>::TickFrame() { + if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + RunGarbageCollector(); + } + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; +} - /// Get the sampler from the graphics descriptor table in the specified index - Sampler* GetGraphicsSampler(u32 index); +template <class P> +const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { + return slot_image_views[id]; +} - /// Get the sampler from the compute descriptor table in the specified index - Sampler* GetComputeSampler(u32 index); +template <class P> +typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { + return slot_image_views[id]; +} - /// Refresh the state for graphics image view and sampler descriptors - void SynchronizeGraphicsDescriptors(); +template <class P> +void TextureCache<P>::MarkModification(ImageId id) noexcept { + MarkModification(slot_images[id]); +} - /// Refresh the state for compute image view and sampler descriptors - void SynchronizeComputeDescriptors(); +template <class P> +void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, + std::span<ImageViewId> image_view_ids) { + FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); +} - /// Update bound render targets and upload memory if necessary - /// @param is_clear True when the render targets are being used for clears - void UpdateRenderTargets(bool is_clear); +template <class P> +void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, + std::span<ImageViewId> image_view_ids) { + FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); +} - /// Find a framebuffer with the currently bound render targets - /// UpdateRenderTargets should be called before this - Framebuffer* GetFramebuffer(); +template <class P> +typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { + if (index > graphics_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = graphics_sampler_table.Read(index); + SamplerId& id = graphics_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} - /// Mark images in a range as modified from the CPU - void WriteMemory(VAddr cpu_addr, size_t size); +template <class P> +typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { + if (index > compute_sampler_table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); + return &slot_samplers[NULL_SAMPLER_ID]; + } + const auto [descriptor, is_new] = compute_sampler_table.Read(index); + SamplerId& id = compute_sampler_ids[index]; + if (is_new) { + id = FindSampler(descriptor); + } + return &slot_samplers[id]; +} - /// Download contents of host images to guest memory in a region - void DownloadMemory(VAddr cpu_addr, size_t size); +template <class P> +void TextureCache<P>::SynchronizeGraphicsDescriptors() { + using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; + const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; + const u32 tic_limit = maxwell3d.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; + if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { + graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { + graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - /// Remove images in a region - void UnmapMemory(VAddr cpu_addr, size_t size); +template <class P> +void TextureCache<P>::SynchronizeComputeDescriptors() { + const bool linked_tsc = kepler_compute.launch_description.linked_tsc; + const u32 tic_limit = kepler_compute.regs.tic.limit; + const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; + const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); + if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { + compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); + } + if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { + compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); + } +} - /// Remove images in a region - void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); +template <class P> +void TextureCache<P>::UpdateRenderTargets(bool is_clear) { + using namespace VideoCommon::Dirty; + auto& flags = maxwell3d.dirty.flags; + if (!flags[Dirty::RenderTargets]) { + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); + return; + } + flags[Dirty::RenderTargets] = false; - /// Blit an image with the given parameters - void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy); + // Render target control is used on all render targets, so force look ups when this one is up + const bool force = flags[Dirty::RenderTargetControl]; + flags[Dirty::RenderTargetControl] = false; - /// Try to find a cached image view in the given CPU address - [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); + for (size_t index = 0; index < NUM_RT; ++index) { + ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; + if (flags[Dirty::ColorBuffer0 + index] || force) { + flags[Dirty::ColorBuffer0 + index] = false; + BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); + } + PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); + } + if (flags[Dirty::ZetaBuffer] || force) { + flags[Dirty::ZetaBuffer] = false; + BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); + } + const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; + PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - /// Return true when there are uncommitted images to be downloaded - [[nodiscard]] bool HasUncommittedFlushes() const noexcept; + for (size_t index = 0; index < NUM_RT; ++index) { + render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); + } + render_targets.size = Extent2D{ + maxwell3d.regs.render_area.width, + maxwell3d.regs.render_area.height, + }; +} - /// Return true when the caller should wait for async downloads - [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; +template <class P> +typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { + return &slot_framebuffers[GetFramebufferId(render_targets)]; +} - /// Commit asynchronous downloads - void CommitAsyncFlushes(); +template <class P> +void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, + std::span<const u32> indices, + std::span<ImageViewId> image_view_ids) { + ASSERT(indices.size() <= image_view_ids.size()); + do { + has_deleted_images = false; + std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { + return VisitImageView(table, cached_image_view_ids, index); + }); + } while (has_deleted_images); +} - /// Pop asynchronous downloads - void PopAsyncFlushes(); +template <class P> +ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, + u32 index) { + if (index > table.Limit()) { + LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); + return NULL_IMAGE_VIEW_ID; + } + const auto [descriptor, is_new] = table.Read(index); + ImageViewId& image_view_id = cached_image_view_ids[index]; + if (is_new) { + image_view_id = FindImageView(descriptor); + } + if (image_view_id != NULL_IMAGE_VIEW_ID) { + PrepareImageView(image_view_id, false, false); + } + return image_view_id; +} - /// Return true when a CPU region is modified from the GPU - [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); +template <class P> +FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { + const auto [pair, is_new] = framebuffers.try_emplace(key); + FramebufferId& framebuffer_id = pair->second; + if (!is_new) { + return framebuffer_id; + } + std::array<ImageView*, NUM_RT> color_buffers; + std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), + [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); + ImageView* const depth_buffer = + key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; + framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); + return framebuffer_id; +} - std::mutex mutex; +template <class P> +void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { + ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { + if (True(image.flags & ImageFlagBits::CpuModified)) { + return; + } + image.flags |= ImageFlagBits::CpuModified; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + }); +} -private: - /// Iterate over all page indices in a range - template <typename Func> - static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; - } - } else { - func(page); - } +template <class P> +void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { + std::vector<ImageId> images; + ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { + if (!image.IsSafeDownload()) { + return; } + image.flags &= ~ImageFlagBits::GpuModified; + images.push_back(image_id); + }); + if (images.empty()) { + return; } + std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { + return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; + }); + for (const ImageId image_id : images) { + Image& image = slot_images[image_id]; + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } +} - template <typename Func> - static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; - } - } else { - func(page); - } +template <class P> +void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { + std::vector<ImageId> deleted_images; + ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); } + UnregisterImage(id); + DeleteImage(id); } +} - /// Runs the Garbage Collector. - void RunGarbageCollector(); - - /// Fills image_view_ids in the image views in indices - void FillImageViews(DescriptorTable<TICEntry>& table, - std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, - std::span<ImageViewId> image_view_ids); - - /// Find or create an image view in the guest descriptor table - ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, - std::span<ImageViewId> cached_image_view_ids, u32 index); - - /// Find or create a framebuffer with the given render target parameters - FramebufferId GetFramebufferId(const RenderTargets& key); +template <class P> +void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { + std::vector<ImageId> deleted_images; + ForEachImageInRegionGPU(gpu_addr, size, + [&](ImageId id, Image&) { deleted_images.push_back(id); }); + for (const ImageId id : deleted_images) { + Image& image = slot_images[id]; + if (True(image.flags & ImageFlagBits::Remapped)) { + continue; + } + image.flags |= ImageFlagBits::Remapped; + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, id); + } + } +} - /// Refresh the contents (pixel data) of an image - void RefreshContents(Image& image, ImageId image_id); +template <class P> +void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy) { + const BlitImages images = GetBlitImages(dst, src); + const ImageId dst_id = images.dst_id; + const ImageId src_id = images.src_id; + PrepareImage(src_id, false, false); + PrepareImage(dst_id, true, false); + + ImageBase& dst_image = slot_images[dst_id]; + const ImageBase& src_image = slot_images[src_id]; + + // TODO: Deduplicate + const std::optional src_base = src_image.TryFindBase(src.Address()); + const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; + const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); + const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); + const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); + const Region2D src_region{ + Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, + Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, + }; - /// Upload data from guest to an image - template <typename StagingBuffer> - void UploadImageContents(Image& image, StagingBuffer& staging_buffer); + const std::optional dst_base = dst_image.TryFindBase(dst.Address()); + const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); + const Region2D dst_region{ + Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, + Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, + }; - /// Find or create an image view from a guest descriptor - [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); + // Always call this after src_framebuffer_id was queried, as the address might be invalidated. + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + if constexpr (FRAMEBUFFER_BLITS) { + // OpenGL blits from framebuffers, not images + Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; + runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, + copy.filter, copy.operation); + } else { + // Vulkan can blit images, but it lacks format reinterpretations + // Provide a framebuffer in case it's necessary + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, + copy.operation); + } +} - /// Create a new image view from a guest descriptor - [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); +template <class P> +typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { + // TODO: Properly implement this + const auto it = page_table.find(cpu_addr >> PAGE_BITS); + if (it == page_table.end()) { + return nullptr; + } + const auto& image_map_ids = it->second; + for (const ImageMapId map_id : image_map_ids) { + const ImageMapView& map = slot_map_views[map_id]; + const ImageBase& image = slot_images[map.image_id]; + if (image.cpu_addr != cpu_addr) { + continue; + } + if (image.image_view_ids.empty()) { + continue; + } + return &slot_image_views[image.image_view_ids.at(0)]; + } + return nullptr; +} - /// Find or create an image from the given parameters - [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options = RelaxedOptions{}); +template <class P> +bool TextureCache<P>::HasUncommittedFlushes() const noexcept { + return !uncommitted_downloads.empty(); +} - /// Find an image from the given parameters - [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options); +template <class P> +bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { + return !committed_downloads.empty() && !committed_downloads.front().empty(); +} - /// Create an image from the given parameters - [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options); +template <class P> +void TextureCache<P>::CommitAsyncFlushes() { + // This is intentionally passing the value by copy + committed_downloads.push(uncommitted_downloads); + uncommitted_downloads.clear(); +} - /// Create a new image and join perfectly matching existing images - /// Remove joined images from the cache - [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); +template <class P> +void TextureCache<P>::PopAsyncFlushes() { + if (committed_downloads.empty()) { + return; + } + const std::span<const ImageId> download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + + download_map.offset = original_offset; + std::span<u8> download_span = download_map.mapped_span; + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); + } + committed_downloads.pop(); +} - /// Return a blit image pair from the given guest blit parameters - [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src); +template <class P> +bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { + bool is_modified = false; + ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { + if (False(image.flags & ImageFlagBits::GpuModified)) { + return false; + } + is_modified = true; + return true; + }); + return is_modified; +} - /// Find or create a sampler from a guest descriptor sampler - [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); +template <class P> +void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { + if (False(image.flags & ImageFlagBits::CpuModified)) { + // Only upload modified images + return; + } + image.flags &= ~ImageFlagBits::CpuModified; + TrackImage(image, image_id); - /// Find or create an image view for the given color buffer index - [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + if (image.info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); + return; + } + auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); + UploadImageContents(image, staging); + runtime.InsertUploadMemoryBarrier(); +} - /// Find or create an image view for the depth buffer - [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); +template <class P> +template <typename StagingBuffer> +void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { + const std::span<u8> mapped_span = staging.mapped_span; + const GPUVAddr gpu_addr = image.gpu_addr; + + if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { + gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); + const auto uploads = FullUploadSwizzles(image.info); + runtime.AccelerateImageUpload(image, staging, uploads); + } else if (True(image.flags & ImageFlagBits::Converted)) { + std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); + auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); + ConvertImage(unswizzled_data, image.info, mapped_span, copies); + image.UploadMemory(staging, copies); + } else { + const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); + image.UploadMemory(staging, copies); + } +} - /// Find or create a view for a render target with the given image parameters - [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear); +template <class P> +ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { + if (!IsValidEntry(gpu_memory, config)) { + return NULL_IMAGE_VIEW_ID; + } + const auto [pair, is_new] = image_views.try_emplace(config); + ImageViewId& image_view_id = pair->second; + if (is_new) { + image_view_id = CreateImageView(config); + } + return image_view_id; +} - /// Iterates over all the images in a region calling func - template <typename Func> - void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); +template <class P> +ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { + const ImageInfo info(config); + if (info.type == ImageType::Buffer) { + const ImageViewInfo view_info(config, 0); + return slot_image_views.insert(runtime, info, view_info, config.Address()); + } + const u32 layer_offset = config.BaseLayer() * info.layer_stride; + const GPUVAddr image_gpu_addr = config.Address() - layer_offset; + const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + ImageBase& image = slot_images[image_id]; + const SubresourceBase base = image.TryFindBase(config.Address()).value(); + ASSERT(base.level == 0); + const ImageViewInfo view_info(config, base.layer); + const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); + ImageViewBase& image_view = slot_image_views[image_view_id]; + image_view.flags |= ImageViewFlagBits::Strong; + image.flags |= ImageFlagBits::Strong; + return image_view_id; +} - template <typename Func> - void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); +template <class P> +ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { + return image_id; + } + return InsertImage(info, gpu_addr, options); +} - template <typename Func> - void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); +template <class P> +ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); + if (!cpu_addr) { + return ImageId{}; + } + } + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + ImageId image_id; + const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { + if (True(existing_image.flags & ImageFlagBits::Remapped)) { + return false; + } + if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { + const bool strict_size = False(options & RelaxedOptions::Size) && + True(existing_image.flags & ImageFlagBits::Strong); + const ImageInfo& existing = existing_image.info; + if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && + existing.pitch == info.pitch && + IsPitchLinearSameSize(existing, info, strict_size) && + IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { + image_id = existing_image_id; + return true; + } + } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, + native_bgr)) { + image_id = existing_image_id; + return true; + } + return false; + }; + ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); + return image_id; +} - /// Iterates over all the images in a region calling func - template <typename Func> - void ForEachSparseSegment(ImageBase& image, Func&& func); +template <class P> +ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options) { + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + if (!cpu_addr) { + const auto size = CalculateGuestSizeInBytes(info); + cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); + if (!cpu_addr) { + const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; + virtual_invalid_space += Common::AlignUp(size, 32); + cpu_addr = std::optional<VAddr>(fake_addr); + } + } + ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); + const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); + const Image& image = slot_images[image_id]; + // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different + const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); + if (is_new) { + it->second = slot_image_allocs.insert(); + } + slot_image_allocs[it->second].images.push_back(image_id); + return image_id; +} - /// Find or create an image view in the given image with the passed parameters - [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); +template <class P> +ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { + ImageInfo new_info = info; + const size_t size_bytes = CalculateGuestSizeInBytes(new_info); + const bool broken_views = runtime.HasBrokenTextureViewFormats(); + const bool native_bgr = runtime.HasNativeBgr(); + std::vector<ImageId> overlap_ids; + std::unordered_set<ImageId> overlaps_found; + std::vector<ImageId> left_aliased_ids; + std::vector<ImageId> right_aliased_ids; + std::unordered_set<ImageId> ignore_textures; + std::vector<ImageId> bad_overlap_ids; + const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + return; + } + if (info.type == ImageType::Linear) { + if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { + // Alias linear images with the same pitch + left_aliased_ids.push_back(overlap_id); + } + return; + } + overlaps_found.insert(overlap_id); + static constexpr bool strict_size = true; + const std::optional<OverlapResult> solution = ResolveOverlap( + new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); + if (solution) { + gpu_addr = solution->gpu_addr; + cpu_addr = solution->cpu_addr; + new_info.resources = solution->resources; + overlap_ids.push_back(overlap_id); + return; + } + static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; + const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); + if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { + left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, + broken_views, native_bgr)) { + right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; + } + }; + ForEachImageInRegion(cpu_addr, size_bytes, region_check); + const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { + if (!overlaps_found.contains(overlap_id)) { + if (True(overlap.flags & ImageFlagBits::Remapped)) { + ignore_textures.insert(overlap_id); + } + if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { + ignore_textures.insert(overlap_id); + } + } + }; + ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); + Image& new_image = slot_images[new_image_id]; - /// Register image in the page table - void RegisterImage(ImageId image); + if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { + new_image.flags |= ImageFlagBits::Sparse; + } - /// Unregister image from the page table - void UnregisterImage(ImageId image); + for (const ImageId overlap_id : ignore_textures) { + Image& overlap = slot_images[overlap_id]; + if (True(overlap.flags & ImageFlagBits::GpuModified)) { + UNIMPLEMENTED(); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } - /// Track CPU reads and writes for image - void TrackImage(ImageBase& image, ImageId image_id); + // TODO: Only upload what we need + RefreshContents(new_image, new_image_id); + + for (const ImageId overlap_id : overlap_ids) { + Image& overlap = slot_images[overlap_id]; + if (overlap.info.num_samples != new_image.info.num_samples) { + LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); + } else { + const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); + const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); + runtime.CopyImage(new_image, overlap, copies); + } + if (True(overlap.flags & ImageFlagBits::Tracked)) { + UntrackImage(overlap, overlap_id); + } + UnregisterImage(overlap_id); + DeleteImage(overlap_id); + } + ImageBase& new_image_base = new_image; + for (const ImageId aliased_id : right_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : left_aliased_ids) { + ImageBase& aliased = slot_images[aliased_id]; + AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; + } + RegisterImage(new_image_id); + return new_image_id; +} - /// Stop tracking CPU reads and writes for image - void UntrackImage(ImageBase& image, ImageId image_id); +template <class P> +typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( + const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { + static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; + const GPUVAddr dst_addr = dst.Address(); + const GPUVAddr src_addr = src.Address(); + ImageInfo dst_info(dst); + ImageInfo src_info(src); + ImageId dst_id; + ImageId src_id; + do { + has_deleted_images = false; + dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); + src_id = FindImage(src_info, src_addr, FIND_OPTIONS); + const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; + const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; + DeduceBlitImages(dst_info, src_info, dst_image, src_image); + if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { + continue; + } + if (!dst_id) { + dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); + } + if (!src_id) { + src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); + } + } while (has_deleted_images); + return BlitImages{ + .dst_id = dst_id, + .src_id = src_id, + .dst_format = dst_info.format, + .src_format = src_info.format, + }; +} - /// Delete image from the cache - void DeleteImage(ImageId image); +template <class P> +SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { + if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { + return NULL_SAMPLER_ID; + } + const auto [pair, is_new] = samplers.try_emplace(config); + if (is_new) { + pair->second = slot_samplers.insert(runtime, config); + } + return pair->second; +} - /// Remove image views references from the cache - void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); +template <class P> +ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { + const auto& regs = maxwell3d.regs; + if (index >= regs.rt_control.count) { + return ImageViewId{}; + } + const auto& rt = regs.rt[index]; + const GPUVAddr gpu_addr = rt.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + if (rt.format == Tegra::RenderTargetFormat::NONE) { + return ImageViewId{}; + } + const ImageInfo info(regs, index); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - /// Remove framebuffers using the given image views from the cache - void RemoveFramebuffers(std::span<const ImageViewId> removed_views); +template <class P> +ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { + const auto& regs = maxwell3d.regs; + if (!regs.zeta_enable) { + return ImageViewId{}; + } + const GPUVAddr gpu_addr = regs.zeta.Address(); + if (gpu_addr == 0) { + return ImageViewId{}; + } + const ImageInfo info(regs); + return FindRenderTargetView(info, gpu_addr, is_clear); +} - /// Mark an image as modified from the GPU - void MarkModification(ImageBase& image) noexcept; +template <class P> +ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear) { + const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; + const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); + if (!image_id) { + return NULL_IMAGE_VIEW_ID; + } + Image& image = slot_images[image_id]; + const ImageViewType view_type = RenderTargetImageViewType(info); + SubresourceBase base; + if (image.info.type == ImageType::Linear) { + base = SubresourceBase{.level = 0, .layer = 0}; + } else { + base = image.TryFindBase(gpu_addr).value(); + } + const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; + const SubresourceRange range{ + .base = base, + .extent = {.levels = 1, .layers = layers}, + }; + return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); +} - /// Synchronize image aliases, copying data if needed - void SynchronizeAliases(ImageId image_id); +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 32> images; + boost::container::small_vector<ImageMapId, 32> maps; + ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { + const auto it = page_table.find(page); + if (it == page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageMapId map_id : it->second) { + ImageMapView& map = slot_map_views[map_id]; + if (map.picked) { + continue; + } + if (!map.Overlaps(cpu_addr, size)) { + continue; + } + map.picked = true; + maps.push_back(map_id); + Image& image = slot_images[map.image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(map.image_id); + if constexpr (BOOL_BREAK) { + if (func(map.image_id, image)) { + return true; + } + } else { + func(map.image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } + for (const ImageMapId map_id : maps) { + slot_map_views[map_id].picked = false; + } +} - /// Prepare an image to be used - void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); +template <class P> +template <typename Func> +void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = gpu_page_table.find(page); + if (it == gpu_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} - /// Prepare an image view to be used - void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); +template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; + static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; + boost::container::small_vector<ImageId, 8> images; + ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { + const auto it = sparse_page_table.find(page); + if (it == sparse_page_table.end()) { + if constexpr (BOOL_BREAK) { + return false; + } else { + return; + } + } + for (const ImageId image_id : it->second) { + Image& image = slot_images[image_id]; + if (True(image.flags & ImageFlagBits::Picked)) { + continue; + } + if (!image.OverlapsGPU(gpu_addr, size)) { + continue; + } + image.flags |= ImageFlagBits::Picked; + images.push_back(image_id); + if constexpr (BOOL_BREAK) { + if (func(image_id, image)) { + return true; + } + } else { + func(image_id, image); + } + } + if constexpr (BOOL_BREAK) { + return false; + } + }); + for (const ImageId image_id : images) { + slot_images[image_id].flags &= ~ImageFlagBits::Picked; + } +} - /// Execute copies from one image to the other, even if they are incompatible - void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); +template <class P> +template <typename Func> +void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { + using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; + static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; + const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); + for (auto& segment : segments) { + const auto gpu_addr = segment.first; + const auto size = segment.second; + std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + if constexpr (RETURNS_BOOL) { + if (func(gpu_addr, *cpu_addr, size)) { + break; + } + } else { + func(gpu_addr, *cpu_addr, size); + } + } +} - /// Bind an image view as render target, downloading resources preemtively if needed - void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); +template <class P> +ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { + Image& image = slot_images[image_id]; + if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { + return image_view_id; + } + const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); + image.InsertView(info, image_view_id); + return image_view_id; +} - /// Create a render target from a given image and image view parameters - [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( - ImageId, const ImageViewInfo& view_info); +template <class P> +void TextureCache<P>::RegisterImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), + "Trying to register an already registered image"); + image.flags |= ImageFlagBits::Registered; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + auto map_id = + slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + image.map_view_id = map_id; + return; + } + std::vector<ImageViewId> sparse_maps{}; + ForEachSparseSegment( + image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); + ForEachCPUPage(cpu_addr, size, + [this, map_id](u64 page) { page_table[page].push_back(map_id); }); + sparse_maps.push_back(map_id); + }); + sparse_views.emplace(image_id, std::move(sparse_maps)); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); +} - /// Returns true if the current clear parameters clear the whole image of a given image view - [[nodiscard]] bool IsFullClear(ImageViewId id); +template <class P> +void TextureCache<P>::UnregisterImage(ImageId image_id) { + Image& image = slot_images[image_id]; + ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), + "Trying to unregister an already registered image"); + image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); + const auto& clear_page_table = + [this, image_id]( + u64 page, + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { + const auto page_it = selected_page_table.find(page); + if (page_it == selected_page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageId>& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); + }; + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, + [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); + if (False(image.flags & ImageFlagBits::Sparse)) { + const auto map_id = image.map_view_id; + ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + const auto vector_it = std::ranges::find(image_map_ids, map_id); + if (vector_it == image_map_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", + page << PAGE_BITS); + return; + } + image_map_ids.erase(vector_it); + }); + slot_map_views.erase(map_id); + return; + } + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { + clear_page_table(page, sparse_page_table); + }); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map_range = slot_map_views[map_view_id]; + const VAddr cpu_addr = map_range.cpu_addr; + const std::size_t size = map_range.size; + ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector<ImageMapId>& image_map_ids = page_it->second; + auto vector_it = image_map_ids.begin(); + while (vector_it != image_map_ids.end()) { + ImageMapView& map = slot_map_views[*vector_it]; + if (map.image_id != image_id) { + vector_it++; + continue; + } + if (!map.picked) { + map.picked = true; + } + vector_it = image_map_ids.erase(vector_it); + } + }); + slot_map_views.erase(map_view_id); + } + sparse_views.erase(it); +} - Runtime& runtime; - VideoCore::RasterizerInterface& rasterizer; - Tegra::Engines::Maxwell3D& maxwell3d; - Tegra::Engines::KeplerCompute& kepler_compute; - Tegra::MemoryManager& gpu_memory; +template <class P> +void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { + ASSERT(False(image.flags & ImageFlagBits::Tracked)); + image.flags |= ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); + return; + } + if (True(image.flags & ImageFlagBits::Registered)) { + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + } + return; + } + ForEachSparseSegment(image, + [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { + rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); + }); +} - DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; - DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; - std::vector<SamplerId> graphics_sampler_ids; - std::vector<ImageViewId> graphics_image_view_ids; +template <class P> +void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { + ASSERT(True(image.flags & ImageFlagBits::Tracked)); + image.flags &= ~ImageFlagBits::Tracked; + if (False(image.flags & ImageFlagBits::Sparse)) { + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); + return; + } + ASSERT(True(image.flags & ImageFlagBits::Registered)); + auto it = sparse_views.find(image_id); + ASSERT(it != sparse_views.end()); + auto& sparse_maps = it->second; + for (auto& map_view_id : sparse_maps) { + const auto& map = slot_map_views[map_view_id]; + const VAddr cpu_addr = map.cpu_addr; + const std::size_t size = map.size; + rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); + } +} - DescriptorTable<TICEntry> compute_image_table{gpu_memory}; - DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; - std::vector<SamplerId> compute_sampler_ids; - std::vector<ImageViewId> compute_image_view_ids; +template <class P> +void TextureCache<P>::DeleteImage(ImageId image_id) { + ImageBase& image = slot_images[image_id]; + const GPUVAddr gpu_addr = image.gpu_addr; + const auto alloc_it = image_allocs_table.find(gpu_addr); + if (alloc_it == image_allocs_table.end()) { + UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", + gpu_addr); + return; + } + const ImageAllocId alloc_id = alloc_it->second; + std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; + const auto alloc_image_it = std::ranges::find(alloc_images, image_id); + if (alloc_image_it == alloc_images.end()) { + UNREACHABLE_MSG("Trying to delete an image that does not exist"); + return; + } + ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); + ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); + + // Mark render targets as dirty + auto& dirty = maxwell3d.dirty.flags; + dirty[Dirty::RenderTargets] = true; + dirty[Dirty::ZetaBuffer] = true; + for (size_t rt = 0; rt < NUM_RT; ++rt) { + dirty[Dirty::ColorBuffer0 + rt] = true; + } + const std::span<const ImageViewId> image_view_ids = image.image_view_ids; + for (const ImageViewId image_view_id : image_view_ids) { + std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); + if (render_targets.depth_buffer_id == image_view_id) { + render_targets.depth_buffer_id = ImageViewId{}; + } + } + RemoveImageViewReferences(image_view_ids); + RemoveFramebuffers(image_view_ids); + + for (const AliasedImage& alias : image.aliased_images) { + ImageBase& other_image = slot_images[alias.id]; + [[maybe_unused]] const size_t num_removed_aliases = + std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { + return other_alias.id == image_id; + }); + other_image.CheckAliasState(); + ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", + num_removed_aliases); + } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } + for (const ImageViewId image_view_id : image_view_ids) { + sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); + slot_image_views.erase(image_view_id); + } + sentenced_images.Push(std::move(slot_images[image_id])); + slot_images.erase(image_id); - RenderTargets render_targets; + alloc_images.erase(alloc_image_it); + if (alloc_images.empty()) { + image_allocs_table.erase(alloc_it); + } + if constexpr (ENABLE_VALIDATION) { + std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); + std::ranges::fill(compute_image_view_ids, CORRUPT_ID); + } + graphics_image_table.Invalidate(); + compute_image_table.Invalidate(); + has_deleted_images = true; +} - std::unordered_map<TICEntry, ImageViewId> image_views; - std::unordered_map<TSCEntry, SamplerId> samplers; - std::unordered_map<RenderTargets, FramebufferId> framebuffers; +template <class P> +void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { + auto it = image_views.begin(); + while (it != image_views.end()) { + const auto found = std::ranges::find(removed_views, it->second); + if (found != removed_views.end()) { + it = image_views.erase(it); + } else { + ++it; + } + } +} - std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; - std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; - std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; +template <class P> +void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { + auto it = framebuffers.begin(); + while (it != framebuffers.end()) { + if (it->first.Contains(removed_views)) { + it = framebuffers.erase(it); + } else { + ++it; + } + } +} - std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; +template <class P> +void TextureCache<P>::MarkModification(ImageBase& image) noexcept { + image.flags |= ImageFlagBits::GpuModified; + image.modification_tick = ++modification_tick; +} - VAddr virtual_invalid_space{}; +template <class P> +void TextureCache<P>::SynchronizeAliases(ImageId image_id) { + boost::container::small_vector<const AliasedImage*, 1> aliased_images; + ImageBase& image = slot_images[image_id]; + u64 most_recent_tick = image.modification_tick; + for (const AliasedImage& aliased : image.aliased_images) { + ImageBase& aliased_image = slot_images[aliased.id]; + if (image.modification_tick < aliased_image.modification_tick) { + most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); + aliased_images.push_back(&aliased); + } + } + if (aliased_images.empty()) { + return; + } + image.modification_tick = most_recent_tick; + std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { + const ImageBase& lhs_image = slot_images[lhs->id]; + const ImageBase& rhs_image = slot_images[rhs->id]; + return lhs_image.modification_tick < rhs_image.modification_tick; + }); + for (const AliasedImage* const aliased : aliased_images) { + CopyImage(image_id, aliased->id, aliased->copies); + } +} - bool has_deleted_images = false; - u64 total_used_memory = 0; - u64 minimum_memory; - u64 expected_memory; - u64 critical_memory; +template <class P> +void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { + Image& image = slot_images[image_id]; + if (invalidate) { + image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); + if (False(image.flags & ImageFlagBits::Tracked)) { + TrackImage(image, image_id); + } + } else { + RefreshContents(image, image_id); + SynchronizeAliases(image_id); + } + if (is_modification) { + MarkModification(image); + } + image.frame_tick = frame_tick; +} - SlotVector<Image> slot_images; - SlotVector<ImageMapView> slot_map_views; - SlotVector<ImageView> slot_image_views; - SlotVector<ImageAlloc> slot_image_allocs; - SlotVector<Sampler> slot_samplers; - SlotVector<Framebuffer> slot_framebuffers; +template <class P> +void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, + bool invalidate) { + if (!image_view_id) { + return; + } + const ImageViewBase& image_view = slot_image_views[image_view_id]; + if (image_view.IsBuffer()) { + return; + } + PrepareImage(image_view.image_id, is_modification, invalidate); +} - // TODO: This data structure is not optimal and it should be reworked - std::vector<ImageId> uncommitted_downloads; - std::queue<std::vector<ImageId>> committed_downloads; +template <class P> +void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { + Image& dst = slot_images[dst_id]; + Image& src = slot_images[src_id]; + const auto dst_format_type = GetFormatType(dst.info.format); + const auto src_format_type = GetFormatType(src.info.format); + if (src_format_type == dst_format_type) { + if constexpr (HAS_EMULATED_COPIES) { + if (!runtime.CanImageBeCopied(dst, src)) { + return runtime.EmulateCopyImage(dst, src, copies); + } + } + return runtime.CopyImage(dst, src, copies); + } + UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); + UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); + for (const ImageCopy& copy : copies) { + UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); + UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); + UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); + + const SubresourceBase dst_base{ + .level = copy.dst_subresource.base_level, + .layer = copy.dst_subresource.base_layer, + }; + const SubresourceBase src_base{ + .level = copy.src_subresource.base_level, + .layer = copy.src_subresource.base_layer, + }; + const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; + const SubresourceExtent src_extent{.levels = 1, .layers = 1}; + const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; + const SubresourceRange src_range{.base = src_base, .extent = src_extent}; + const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); + const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); + const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); + Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; + const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); + ImageView& dst_view = slot_image_views[dst_view_id]; + ImageView& src_view = slot_image_views[src_view_id]; + [[maybe_unused]] const Extent3D expected_size{ + .width = std::min(dst_view.size.width, src_view.size.width), + .height = std::min(dst_view.size.height, src_view.size.height), + .depth = std::min(dst_view.size.depth, src_view.size.depth), + }; + UNIMPLEMENTED_IF(copy.extent != expected_size); + + runtime.ConvertImage(dst_framebuffer, dst_view, src_view); + } +} - static constexpr size_t TICKS_TO_DESTROY = 6; - DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; - DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; - DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; +template <class P> +void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { + if (*old_id == new_id) { + return; + } + if (*old_id) { + const ImageViewBase& old_view = slot_image_views[*old_id]; + if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { + uncommitted_downloads.push_back(old_view.image_id); + } + } + *old_id = new_id; +} - std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; +template <class P> +std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( + ImageId image_id, const ImageViewInfo& view_info) { + const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); + const ImageBase& image = slot_images[image_id]; + const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; + const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; + const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; + const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); + const u32 num_samples = image.info.num_samples; + const auto [samples_x, samples_y] = SamplesLog2(num_samples); + const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ + .color_buffer_ids = {color_view_id}, + .depth_buffer_id = depth_view_id, + .size = {extent.width >> samples_x, extent.height >> samples_y}, + }); + return {framebuffer_id, view_id}; +} - u64 modification_tick = 0; - u64 frame_tick = 0; - typename SlotVector<Image>::Iterator deletion_iterator; -}; +template <class P> +bool TextureCache<P>::IsFullClear(ImageViewId id) { + if (!id) { + return true; + } + const ImageViewBase& image_view = slot_image_views[id]; + const ImageBase& image = slot_images[image_view.image_id]; + const Extent3D size = image_view.size; + const auto& regs = maxwell3d.regs; + const auto& scissor = regs.scissor_test[0]; + if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { + // Images with multiple resources can't be cleared in a single call + return false; + } + if (regs.clear_flags.scissor == 0) { + // If scissor testing is disabled, the clear is always full + return true; + } + // Make sure the clear covers all texels in the subresource + return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && + scissor.max_y >= size.height; +} } // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h new file mode 100644 index 000000000..a4f6e9422 --- /dev/null +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -0,0 +1,402 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <algorithm> +#include <array> +#include <bit> +#include <memory> +#include <mutex> +#include <optional> +#include <span> +#include <type_traits> +#include <unordered_map> +#include <unordered_set> +#include <utility> +#include <vector> + +#include <boost/container/small_vector.hpp> + +#include "common/alignment.h" +#include "common/common_types.h" +#include "common/literals.h" +#include "common/logging/log.h" +#include "common/settings.h" +#include "video_core/compatible_formats.h" +#include "video_core/delayed_destruction_ring.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/fermi_2d.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" +#include "video_core/texture_cache/descriptor_table.h" +#include "video_core/texture_cache/format_lookup_table.h" +#include "video_core/texture_cache/formatter.h" +#include "video_core/texture_cache/image_base.h" +#include "video_core/texture_cache/image_info.h" +#include "video_core/texture_cache/image_view_base.h" +#include "video_core/texture_cache/image_view_info.h" +#include "video_core/texture_cache/render_targets.h" +#include "video_core/texture_cache/samples_helper.h" +#include "video_core/texture_cache/slot_vector.h" +#include "video_core/texture_cache/types.h" +#include "video_core/texture_cache/util.h" +#include "video_core/textures/texture.h" + +namespace VideoCommon { + +using Tegra::Texture::SwizzleSource; +using Tegra::Texture::TextureType; +using Tegra::Texture::TICEntry; +using Tegra::Texture::TSCEntry; +using VideoCore::Surface::GetFormatType; +using VideoCore::Surface::IsCopyCompatible; +using VideoCore::Surface::PixelFormat; +using VideoCore::Surface::PixelFormatFromDepthFormat; +using VideoCore::Surface::PixelFormatFromRenderTargetFormat; +using VideoCore::Surface::SurfaceType; +using namespace Common::Literals; + +template <class P> +class TextureCache { + /// Address shift for caching images into a hash table + static constexpr u64 PAGE_BITS = 20; + + /// Enables debugging features to the texture cache + static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION; + /// Implement blits as copies between framebuffers + static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; + /// True when some copies have to be emulated + static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + /// True when the API can provide info about the memory of the device. + static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; + + /// Image view ID for null descriptors + static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; + /// Sampler ID for bugged sampler ids + static constexpr SamplerId NULL_SAMPLER_ID{0}; + + static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB; + static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB; + + using Runtime = typename P::Runtime; + using Image = typename P::Image; + using ImageAlloc = typename P::ImageAlloc; + using ImageView = typename P::ImageView; + using Sampler = typename P::Sampler; + using Framebuffer = typename P::Framebuffer; + + struct BlitImages { + ImageId dst_id; + ImageId src_id; + PixelFormat dst_format; + PixelFormat src_format; + }; + + template <typename T> + struct IdentityHash { + [[nodiscard]] size_t operator()(T value) const noexcept { + return static_cast<size_t>(value); + } + }; + +public: + explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&, + Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&); + + /// Notify the cache that a new frame has been queued + void TickFrame(); + + /// Return a constant reference to the given image view id + [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; + + /// Return a reference to the given image view id + [[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept; + + /// Mark an image as modified from the GPU + void MarkModification(ImageId id) noexcept; + + /// Fill image_view_ids with the graphics images in indices + void FillGraphicsImageViews(std::span<const u32> indices, + std::span<ImageViewId> image_view_ids); + + /// Fill image_view_ids with the compute images in indices + void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids); + + /// Get the sampler from the graphics descriptor table in the specified index + Sampler* GetGraphicsSampler(u32 index); + + /// Get the sampler from the compute descriptor table in the specified index + Sampler* GetComputeSampler(u32 index); + + /// Refresh the state for graphics image view and sampler descriptors + void SynchronizeGraphicsDescriptors(); + + /// Refresh the state for compute image view and sampler descriptors + void SynchronizeComputeDescriptors(); + + /// Update bound render targets and upload memory if necessary + /// @param is_clear True when the render targets are being used for clears + void UpdateRenderTargets(bool is_clear); + + /// Find a framebuffer with the currently bound render targets + /// UpdateRenderTargets should be called before this + Framebuffer* GetFramebuffer(); + + /// Mark images in a range as modified from the CPU + void WriteMemory(VAddr cpu_addr, size_t size); + + /// Download contents of host images to guest memory in a region + void DownloadMemory(VAddr cpu_addr, size_t size); + + /// Remove images in a region + void UnmapMemory(VAddr cpu_addr, size_t size); + + /// Remove images in a region + void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); + + /// Blit an image with the given parameters + void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src, + const Tegra::Engines::Fermi2D::Config& copy); + + /// Try to find a cached image view in the given CPU address + [[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr); + + /// Return true when there are uncommitted images to be downloaded + [[nodiscard]] bool HasUncommittedFlushes() const noexcept; + + /// Return true when the caller should wait for async downloads + [[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept; + + /// Commit asynchronous downloads + void CommitAsyncFlushes(); + + /// Pop asynchronous downloads + void PopAsyncFlushes(); + + /// Return true when a CPU region is modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); + + std::mutex mutex; + +private: + /// Iterate over all page indices in a range + template <typename Func> + static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + template <typename Func> + static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { + static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>; + const u64 page_end = (addr + size - 1) >> PAGE_BITS; + for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { + if constexpr (RETURNS_BOOL) { + if (func(page)) { + break; + } + } else { + func(page); + } + } + } + + /// Runs the Garbage Collector. + void RunGarbageCollector(); + + /// Fills image_view_ids in the image views in indices + void FillImageViews(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices, + std::span<ImageViewId> image_view_ids); + + /// Find or create an image view in the guest descriptor table + ImageViewId VisitImageView(DescriptorTable<TICEntry>& table, + std::span<ImageViewId> cached_image_view_ids, u32 index); + + /// Find or create a framebuffer with the given render target parameters + FramebufferId GetFramebufferId(const RenderTargets& key); + + /// Refresh the contents (pixel data) of an image + void RefreshContents(Image& image, ImageId image_id); + + /// Upload data from guest to an image + template <typename StagingBuffer> + void UploadImageContents(Image& image, StagingBuffer& staging_buffer); + + /// Find or create an image view from a guest descriptor + [[nodiscard]] ImageViewId FindImageView(const TICEntry& config); + + /// Create a new image view from a guest descriptor + [[nodiscard]] ImageViewId CreateImageView(const TICEntry& config); + + /// Find or create an image from the given parameters + [[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options = RelaxedOptions{}); + + /// Find an image from the given parameters + [[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); + + /// Create an image from the given parameters + [[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, + RelaxedOptions options); + + /// Create a new image and join perfectly matching existing images + /// Remove joined images from the cache + [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + + /// Return a blit image pair from the given guest blit parameters + [[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst, + const Tegra::Engines::Fermi2D::Surface& src); + + /// Find or create a sampler from a guest descriptor sampler + [[nodiscard]] SamplerId FindSampler(const TSCEntry& config); + + /// Find or create an image view for the given color buffer index + [[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear); + + /// Find or create an image view for the depth buffer + [[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear); + + /// Find or create a view for a render target with the given image parameters + [[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, + bool is_clear); + + /// Iterates over all the images in a region calling func + template <typename Func> + void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); + + template <typename Func> + void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); + + template <typename Func> + void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); + + /// Iterates over all the images in a region calling func + template <typename Func> + void ForEachSparseSegment(ImageBase& image, Func&& func); + + /// Find or create an image view in the given image with the passed parameters + [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); + + /// Register image in the page table + void RegisterImage(ImageId image); + + /// Unregister image from the page table + void UnregisterImage(ImageId image); + + /// Track CPU reads and writes for image + void TrackImage(ImageBase& image, ImageId image_id); + + /// Stop tracking CPU reads and writes for image + void UntrackImage(ImageBase& image, ImageId image_id); + + /// Delete image from the cache + void DeleteImage(ImageId image); + + /// Remove image views references from the cache + void RemoveImageViewReferences(std::span<const ImageViewId> removed_views); + + /// Remove framebuffers using the given image views from the cache + void RemoveFramebuffers(std::span<const ImageViewId> removed_views); + + /// Mark an image as modified from the GPU + void MarkModification(ImageBase& image) noexcept; + + /// Synchronize image aliases, copying data if needed + void SynchronizeAliases(ImageId image_id); + + /// Prepare an image to be used + void PrepareImage(ImageId image_id, bool is_modification, bool invalidate); + + /// Prepare an image view to be used + void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate); + + /// Execute copies from one image to the other, even if they are incompatible + void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies); + + /// Bind an image view as render target, downloading resources preemtively if needed + void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id); + + /// Create a render target from a given image and image view parameters + [[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage( + ImageId, const ImageViewInfo& view_info); + + /// Returns true if the current clear parameters clear the whole image of a given image view + [[nodiscard]] bool IsFullClear(ImageViewId id); + + Runtime& runtime; + VideoCore::RasterizerInterface& rasterizer; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::Engines::KeplerCompute& kepler_compute; + Tegra::MemoryManager& gpu_memory; + + DescriptorTable<TICEntry> graphics_image_table{gpu_memory}; + DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory}; + std::vector<SamplerId> graphics_sampler_ids; + std::vector<ImageViewId> graphics_image_view_ids; + + DescriptorTable<TICEntry> compute_image_table{gpu_memory}; + DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory}; + std::vector<SamplerId> compute_sampler_ids; + std::vector<ImageViewId> compute_image_view_ids; + + RenderTargets render_targets; + + std::unordered_map<TICEntry, ImageViewId> image_views; + std::unordered_map<TSCEntry, SamplerId> samplers; + std::unordered_map<RenderTargets, FramebufferId> framebuffers; + + std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table; + std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table; + + std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views; + + VAddr virtual_invalid_space{}; + + bool has_deleted_images = false; + u64 total_used_memory = 0; + u64 minimum_memory; + u64 expected_memory; + u64 critical_memory; + + SlotVector<Image> slot_images; + SlotVector<ImageMapView> slot_map_views; + SlotVector<ImageView> slot_image_views; + SlotVector<ImageAlloc> slot_image_allocs; + SlotVector<Sampler> slot_samplers; + SlotVector<Framebuffer> slot_framebuffers; + + // TODO: This data structure is not optimal and it should be reworked + std::vector<ImageId> uncommitted_downloads; + std::queue<std::vector<ImageId>> committed_downloads; + + static constexpr size_t TICKS_TO_DESTROY = 6; + DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; + DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; + DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers; + + std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table; + + u64 modification_tick = 0; + u64 frame_tick = 0; + typename SlotVector<Image>::Iterator deletion_iterator; +}; + +} // namespace VideoCommon diff --git a/src/video_core/texture_cache/texture_cache_templates.h b/src/video_core/texture_cache/texture_cache_templates.h deleted file mode 100644 index 8440d23d1..000000000 --- a/src/video_core/texture_cache/texture_cache_templates.h +++ /dev/null @@ -1,1507 +0,0 @@ -// Copyright 2021 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include "video_core/texture_cache/texture_cache.h" - -namespace VideoCommon { - -using Tegra::Texture::SwizzleSource; -using Tegra::Texture::TextureType; -using Tegra::Texture::TICEntry; -using Tegra::Texture::TSCEntry; -using VideoCore::Surface::GetFormatType; -using VideoCore::Surface::IsCopyCompatible; -using VideoCore::Surface::PixelFormat; -using VideoCore::Surface::PixelFormatFromDepthFormat; -using VideoCore::Surface::PixelFormatFromRenderTargetFormat; -using VideoCore::Surface::SurfaceType; -using namespace Common::Literals; - -template <class P> -TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, - Tegra::Engines::Maxwell3D& maxwell3d_, - Tegra::Engines::KeplerCompute& kepler_compute_, - Tegra::MemoryManager& gpu_memory_) - : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, - kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { - // Configure null sampler - TSCEntry sampler_descriptor{}; - sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mag_filter.Assign(Tegra::Texture::TextureFilter::Linear); - sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); - sampler_descriptor.cubemap_anisotropy.Assign(1); - - // Make sure the first index is reserved for the null resources - // This way the null resource becomes a compile time constant - void(slot_image_views.insert(runtime, NullImageParams{})); - void(slot_samplers.insert(runtime, sampler_descriptor)); - - deletion_iterator = slot_images.begin(); - - if constexpr (HAS_DEVICE_MEMORY_INFO) { - const auto device_memory = runtime.GetDeviceLocalMemory(); - const u64 possible_expected_memory = (device_memory * 3) / 10; - const u64 possible_critical_memory = (device_memory * 6) / 10; - expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); - critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); - minimum_memory = 0; - } else { - // on OGL we can be more conservatives as the driver takes care. - expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB; - critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB; - minimum_memory = expected_memory; - } -} - -template <class P> -void TextureCache<P>::RunGarbageCollector() { - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } - } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } - } - ++deletion_iterator; - } -} - -template <class P> -void TextureCache<P>::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { - RunGarbageCollector(); - } - sentenced_images.Tick(); - sentenced_framebuffers.Tick(); - sentenced_image_view.Tick(); - ++frame_tick; -} - -template <class P> -const typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) const noexcept { - return slot_image_views[id]; -} - -template <class P> -typename P::ImageView& TextureCache<P>::GetImageView(ImageViewId id) noexcept { - return slot_image_views[id]; -} - -template <class P> -void TextureCache<P>::MarkModification(ImageId id) noexcept { - MarkModification(slot_images[id]); -} - -template <class P> -void TextureCache<P>::FillGraphicsImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - FillImageViews(graphics_image_table, graphics_image_view_ids, indices, image_view_ids); -} - -template <class P> -void TextureCache<P>::FillComputeImageViews(std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - FillImageViews(compute_image_table, compute_image_view_ids, indices, image_view_ids); -} - -template <class P> -typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { - if (index > graphics_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = graphics_sampler_table.Read(index); - SamplerId& id = graphics_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template <class P> -typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { - if (index > compute_sampler_table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); - return &slot_samplers[NULL_SAMPLER_ID]; - } - const auto [descriptor, is_new] = compute_sampler_table.Read(index); - SamplerId& id = compute_sampler_ids[index]; - if (is_new) { - id = FindSampler(descriptor); - } - return &slot_samplers[id]; -} - -template <class P> -void TextureCache<P>::SynchronizeGraphicsDescriptors() { - using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; - const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; - const u32 tic_limit = maxwell3d.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; - if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { - graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { - graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template <class P> -void TextureCache<P>::SynchronizeComputeDescriptors() { - const bool linked_tsc = kepler_compute.launch_description.linked_tsc; - const u32 tic_limit = kepler_compute.regs.tic.limit; - const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; - const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); - if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { - compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); - } - if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { - compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); - } -} - -template <class P> -void TextureCache<P>::UpdateRenderTargets(bool is_clear) { - using namespace VideoCommon::Dirty; - auto& flags = maxwell3d.dirty.flags; - if (!flags[Dirty::RenderTargets]) { - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - return; - } - flags[Dirty::RenderTargets] = false; - - // Render target control is used on all render targets, so force look ups when this one is up - const bool force = flags[Dirty::RenderTargetControl]; - flags[Dirty::RenderTargetControl] = false; - - for (size_t index = 0; index < NUM_RT; ++index) { - ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; - if (flags[Dirty::ColorBuffer0 + index] || force) { - flags[Dirty::ColorBuffer0 + index] = false; - BindRenderTarget(&color_buffer_id, FindColorBuffer(index, is_clear)); - } - PrepareImageView(color_buffer_id, true, is_clear && IsFullClear(color_buffer_id)); - } - if (flags[Dirty::ZetaBuffer] || force) { - flags[Dirty::ZetaBuffer] = false; - BindRenderTarget(&render_targets.depth_buffer_id, FindDepthBuffer(is_clear)); - } - const ImageViewId depth_buffer_id = render_targets.depth_buffer_id; - PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); - - for (size_t index = 0; index < NUM_RT; ++index) { - render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); - } - render_targets.size = Extent2D{ - maxwell3d.regs.render_area.width, - maxwell3d.regs.render_area.height, - }; -} - -template <class P> -typename P::Framebuffer* TextureCache<P>::GetFramebuffer() { - return &slot_framebuffers[GetFramebufferId(render_targets)]; -} - -template <class P> -void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table, - std::span<ImageViewId> cached_image_view_ids, - std::span<const u32> indices, - std::span<ImageViewId> image_view_ids) { - ASSERT(indices.size() <= image_view_ids.size()); - do { - has_deleted_images = false; - std::ranges::transform(indices, image_view_ids.begin(), [&](u32 index) { - return VisitImageView(table, cached_image_view_ids, index); - }); - } while (has_deleted_images); -} - -template <class P> -ImageViewId TextureCache<P>::VisitImageView(DescriptorTable<TICEntry>& table, - std::span<ImageViewId> cached_image_view_ids, - u32 index) { - if (index > table.Limit()) { - LOG_DEBUG(HW_GPU, "Invalid image view index={}", index); - return NULL_IMAGE_VIEW_ID; - } - const auto [descriptor, is_new] = table.Read(index); - ImageViewId& image_view_id = cached_image_view_ids[index]; - if (is_new) { - image_view_id = FindImageView(descriptor); - } - if (image_view_id != NULL_IMAGE_VIEW_ID) { - PrepareImageView(image_view_id, false, false); - } - return image_view_id; -} - -template <class P> -FramebufferId TextureCache<P>::GetFramebufferId(const RenderTargets& key) { - const auto [pair, is_new] = framebuffers.try_emplace(key); - FramebufferId& framebuffer_id = pair->second; - if (!is_new) { - return framebuffer_id; - } - std::array<ImageView*, NUM_RT> color_buffers; - std::ranges::transform(key.color_buffer_ids, color_buffers.begin(), - [this](ImageViewId id) { return id ? &slot_image_views[id] : nullptr; }); - ImageView* const depth_buffer = - key.depth_buffer_id ? &slot_image_views[key.depth_buffer_id] : nullptr; - framebuffer_id = slot_framebuffers.insert(runtime, color_buffers, depth_buffer, key); - return framebuffer_id; -} - -template <class P> -void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) { - ForEachImageInRegion(cpu_addr, size, [this](ImageId image_id, Image& image) { - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } - }); -} - -template <class P> -void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { - std::vector<ImageId> images; - ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { - if (!image.IsSafeDownload()) { - return; - } - image.flags &= ~ImageFlagBits::GpuModified; - images.push_back(image_id); - }); - if (images.empty()) { - return; - } - std::ranges::sort(images, [this](ImageId lhs, ImageId rhs) { - return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; - }); - for (const ImageId image_id : images) { - Image& image = slot_images[image_id]; - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); - } -} - -template <class P> -void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) { - std::vector<ImageId> deleted_images; - ForEachImageInRegion(cpu_addr, size, [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - UnregisterImage(id); - DeleteImage(id); - } -} - -template <class P> -void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { - std::vector<ImageId> deleted_images; - ForEachImageInRegionGPU(gpu_addr, size, - [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Remapped)) { - continue; - } - image.flags |= ImageFlagBits::Remapped; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - } -} - -template <class P> -void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, - const Tegra::Engines::Fermi2D::Surface& src, - const Tegra::Engines::Fermi2D::Config& copy) { - const BlitImages images = GetBlitImages(dst, src); - const ImageId dst_id = images.dst_id; - const ImageId src_id = images.src_id; - PrepareImage(src_id, false, false); - PrepareImage(dst_id, true, false); - - ImageBase& dst_image = slot_images[dst_id]; - const ImageBase& src_image = slot_images[src_id]; - - // TODO: Deduplicate - const std::optional src_base = src_image.TryFindBase(src.Address()); - const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}}; - const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range); - const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info); - const auto [src_samples_x, src_samples_y] = SamplesLog2(src_image.info.num_samples); - const Region2D src_region{ - Offset2D{.x = copy.src_x0 >> src_samples_x, .y = copy.src_y0 >> src_samples_y}, - Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y}, - }; - - const std::optional dst_base = dst_image.TryFindBase(dst.Address()); - const SubresourceRange dst_range{.base = dst_base.value(), .extent = {1, 1}}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, images.dst_format, dst_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - const auto [dst_samples_x, dst_samples_y] = SamplesLog2(dst_image.info.num_samples); - const Region2D dst_region{ - Offset2D{.x = copy.dst_x0 >> dst_samples_x, .y = copy.dst_y0 >> dst_samples_y}, - Offset2D{.x = copy.dst_x1 >> dst_samples_x, .y = copy.dst_y1 >> dst_samples_y}, - }; - - // Always call this after src_framebuffer_id was queried, as the address might be invalidated. - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - if constexpr (FRAMEBUFFER_BLITS) { - // OpenGL blits from framebuffers, not images - Framebuffer* const src_framebuffer = &slot_framebuffers[src_framebuffer_id]; - runtime.BlitFramebuffer(dst_framebuffer, src_framebuffer, dst_region, src_region, - copy.filter, copy.operation); - } else { - // Vulkan can blit images, but it lacks format reinterpretations - // Provide a framebuffer in case it's necessary - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - runtime.BlitImage(dst_framebuffer, dst_view, src_view, dst_region, src_region, copy.filter, - copy.operation); - } -} - -template <class P> -typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { - // TODO: Properly implement this - const auto it = page_table.find(cpu_addr >> PAGE_BITS); - if (it == page_table.end()) { - return nullptr; - } - const auto& image_map_ids = it->second; - for (const ImageMapId map_id : image_map_ids) { - const ImageMapView& map = slot_map_views[map_id]; - const ImageBase& image = slot_images[map.image_id]; - if (image.cpu_addr != cpu_addr) { - continue; - } - if (image.image_view_ids.empty()) { - continue; - } - return &slot_image_views[image.image_view_ids.at(0)]; - } - return nullptr; -} - -template <class P> -bool TextureCache<P>::HasUncommittedFlushes() const noexcept { - return !uncommitted_downloads.empty(); -} - -template <class P> -bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { - return !committed_downloads.empty() && !committed_downloads.front().empty(); -} - -template <class P> -void TextureCache<P>::CommitAsyncFlushes() { - // This is intentionally passing the value by copy - committed_downloads.push(uncommitted_downloads); - uncommitted_downloads.clear(); -} - -template <class P> -void TextureCache<P>::PopAsyncFlushes() { - if (committed_downloads.empty()) { - return; - } - const std::span<const ImageId> download_ids = committed_downloads.front(); - if (download_ids.empty()) { - committed_downloads.pop(); - return; - } - size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; - } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += image.unswizzled_size_bytes; - } - // Wait for downloads to finish - runtime.Finish(); - - download_map.offset = original_offset; - std::span<u8> download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); - download_map.offset += image.unswizzled_size_bytes; - download_span = download_span.subspan(image.unswizzled_size_bytes); - } - committed_downloads.pop(); -} - -template <class P> -bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) { - bool is_modified = false; - ForEachImageInRegion(addr, size, [&is_modified](ImageId, ImageBase& image) { - if (False(image.flags & ImageFlagBits::GpuModified)) { - return false; - } - is_modified = true; - return true; - }); - return is_modified; -} - -template <class P> -void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { - if (False(image.flags & ImageFlagBits::CpuModified)) { - // Only upload modified images - return; - } - image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image, image_id); - - if (image.info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); - return; - } - auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); - UploadImageContents(image, staging); - runtime.InsertUploadMemoryBarrier(); -} - -template <class P> -template <typename StagingBuffer> -void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) { - const std::span<u8> mapped_span = staging.mapped_span; - const GPUVAddr gpu_addr = image.gpu_addr; - - if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { - gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); - const auto uploads = FullUploadSwizzles(image.info); - runtime.AccelerateImageUpload(image, staging, uploads); - } else if (True(image.flags & ImageFlagBits::Converted)) { - std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); - auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); - ConvertImage(unswizzled_data, image.info, mapped_span, copies); - image.UploadMemory(staging, copies); - } else { - const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); - image.UploadMemory(staging, copies); - } -} - -template <class P> -ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { - return NULL_IMAGE_VIEW_ID; - } - const auto [pair, is_new] = image_views.try_emplace(config); - ImageViewId& image_view_id = pair->second; - if (is_new) { - image_view_id = CreateImageView(config); - } - return image_view_id; -} - -template <class P> -ImageViewId TextureCache<P>::CreateImageView(const TICEntry& config) { - const ImageInfo info(config); - if (info.type == ImageType::Buffer) { - const ImageViewInfo view_info(config, 0); - return slot_image_views.insert(runtime, info, view_info, config.Address()); - } - const u32 layer_offset = config.BaseLayer() * info.layer_stride; - const GPUVAddr image_gpu_addr = config.Address() - layer_offset; - const ImageId image_id = FindOrInsertImage(info, image_gpu_addr); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - ImageBase& image = slot_images[image_id]; - const SubresourceBase base = image.TryFindBase(config.Address()).value(); - ASSERT(base.level == 0); - const ImageViewInfo view_info(config, base.layer); - const ImageViewId image_view_id = FindOrEmplaceImageView(image_id, view_info); - ImageViewBase& image_view = slot_image_views[image_view_id]; - image_view.flags |= ImageViewFlagBits::Strong; - image.flags |= ImageFlagBits::Strong; - return image_view_id; -} - -template <class P> -ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - if (const ImageId image_id = FindImage(info, gpu_addr, options); image_id) { - return image_id; - } - return InsertImage(info, gpu_addr, options); -} - -template <class P> -ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); - if (!cpu_addr) { - return ImageId{}; - } - } - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - ImageId image_id; - const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { - if (True(existing_image.flags & ImageFlagBits::Remapped)) { - return false; - } - if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { - const bool strict_size = False(options & RelaxedOptions::Size) && - True(existing_image.flags & ImageFlagBits::Strong); - const ImageInfo& existing = existing_image.info; - if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && - existing.pitch == info.pitch && - IsPitchLinearSameSize(existing, info, strict_size) && - IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) { - image_id = existing_image_id; - return true; - } - } else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views, - native_bgr)) { - image_id = existing_image_id; - return true; - } - return false; - }; - ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); - return image_id; -} - -template <class P> -ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, - RelaxedOptions options) { - std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); - if (!cpu_addr) { - const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); - cpu_addr = std::optional<VAddr>(fake_addr); - } - } - ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); - const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); - const Image& image = slot_images[image_id]; - // Using "image.gpu_addr" instead of "gpu_addr" is important because it might be different - const auto [it, is_new] = image_allocs_table.try_emplace(image.gpu_addr); - if (is_new) { - it->second = slot_image_allocs.insert(); - } - slot_image_allocs[it->second].images.push_back(image_id); - return image_id; -} - -template <class P> -ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr) { - ImageInfo new_info = info; - const size_t size_bytes = CalculateGuestSizeInBytes(new_info); - const bool broken_views = runtime.HasBrokenTextureViewFormats(); - const bool native_bgr = runtime.HasNativeBgr(); - std::vector<ImageId> overlap_ids; - std::unordered_set<ImageId> overlaps_found; - std::vector<ImageId> left_aliased_ids; - std::vector<ImageId> right_aliased_ids; - std::unordered_set<ImageId> ignore_textures; - std::vector<ImageId> bad_overlap_ids; - const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - return; - } - if (info.type == ImageType::Linear) { - if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { - // Alias linear images with the same pitch - left_aliased_ids.push_back(overlap_id); - } - return; - } - overlaps_found.insert(overlap_id); - static constexpr bool strict_size = true; - const std::optional<OverlapResult> solution = ResolveOverlap( - new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); - if (solution) { - gpu_addr = solution->gpu_addr; - cpu_addr = solution->cpu_addr; - new_info.resources = solution->resources; - overlap_ids.push_back(overlap_id); - return; - } - static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format; - const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); - if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { - left_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, - broken_views, native_bgr)) { - right_aliased_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::Alias; - } else { - bad_overlap_ids.push_back(overlap_id); - overlap.flags |= ImageFlagBits::BadOverlap; - } - }; - ForEachImageInRegion(cpu_addr, size_bytes, region_check); - const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { - if (!overlaps_found.contains(overlap_id)) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - } - if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) { - ignore_textures.insert(overlap_id); - } - } - }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); - const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); - Image& new_image = slot_images[new_image_id]; - - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { - new_image.flags |= ImageFlagBits::Sparse; - } - - for (const ImageId overlap_id : ignore_textures) { - Image& overlap = slot_images[overlap_id]; - if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - - // TODO: Only upload what we need - RefreshContents(new_image, new_image_id); - - for (const ImageId overlap_id : overlap_ids) { - Image& overlap = slot_images[overlap_id]; - if (overlap.info.num_samples != new_image.info.num_samples) { - LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); - } else { - const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); - const auto copies = MakeShrinkImageCopies(new_info, overlap.info, base); - runtime.CopyImage(new_image, overlap, copies); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - ImageBase& new_image_base = new_image; - for (const ImageId aliased_id : right_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : left_aliased_ids) { - ImageBase& aliased = slot_images[aliased_id]; - AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); - new_image.flags |= ImageFlagBits::Alias; - } - for (const ImageId aliased_id : bad_overlap_ids) { - ImageBase& aliased = slot_images[aliased_id]; - aliased.overlapping_images.push_back(new_image_id); - new_image.overlapping_images.push_back(aliased_id); - new_image.flags |= ImageFlagBits::BadOverlap; - } - RegisterImage(new_image_id); - return new_image_id; -} - -template <class P> -typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages( - const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) { - static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples; - const GPUVAddr dst_addr = dst.Address(); - const GPUVAddr src_addr = src.Address(); - ImageInfo dst_info(dst); - ImageInfo src_info(src); - ImageId dst_id; - ImageId src_id; - do { - has_deleted_images = false; - dst_id = FindImage(dst_info, dst_addr, FIND_OPTIONS); - src_id = FindImage(src_info, src_addr, FIND_OPTIONS); - const ImageBase* const dst_image = dst_id ? &slot_images[dst_id] : nullptr; - const ImageBase* const src_image = src_id ? &slot_images[src_id] : nullptr; - DeduceBlitImages(dst_info, src_info, dst_image, src_image); - if (GetFormatType(dst_info.format) != GetFormatType(src_info.format)) { - continue; - } - if (!dst_id) { - dst_id = InsertImage(dst_info, dst_addr, RelaxedOptions{}); - } - if (!src_id) { - src_id = InsertImage(src_info, src_addr, RelaxedOptions{}); - } - } while (has_deleted_images); - return BlitImages{ - .dst_id = dst_id, - .src_id = src_id, - .dst_format = dst_info.format, - .src_format = src_info.format, - }; -} - -template <class P> -SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { - if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { - return NULL_SAMPLER_ID; - } - const auto [pair, is_new] = samplers.try_emplace(config); - if (is_new) { - pair->second = slot_samplers.insert(runtime, config); - } - return pair->second; -} - -template <class P> -ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { - const auto& regs = maxwell3d.regs; - if (index >= regs.rt_control.count) { - return ImageViewId{}; - } - const auto& rt = regs.rt[index]; - const GPUVAddr gpu_addr = rt.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - if (rt.format == Tegra::RenderTargetFormat::NONE) { - return ImageViewId{}; - } - const ImageInfo info(regs, index); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template <class P> -ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { - const auto& regs = maxwell3d.regs; - if (!regs.zeta_enable) { - return ImageViewId{}; - } - const GPUVAddr gpu_addr = regs.zeta.Address(); - if (gpu_addr == 0) { - return ImageViewId{}; - } - const ImageInfo info(regs); - return FindRenderTargetView(info, gpu_addr, is_clear); -} - -template <class P> -ImageViewId TextureCache<P>::FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr, - bool is_clear) { - const auto options = is_clear ? RelaxedOptions::Samples : RelaxedOptions{}; - const ImageId image_id = FindOrInsertImage(info, gpu_addr, options); - if (!image_id) { - return NULL_IMAGE_VIEW_ID; - } - Image& image = slot_images[image_id]; - const ImageViewType view_type = RenderTargetImageViewType(info); - SubresourceBase base; - if (image.info.type == ImageType::Linear) { - base = SubresourceBase{.level = 0, .layer = 0}; - } else { - base = image.TryFindBase(gpu_addr).value(); - } - const s32 layers = image.info.type == ImageType::e3D ? info.size.depth : info.resources.layers; - const SubresourceRange range{ - .base = base, - .extent = {.levels = 1, .layers = layers}, - }; - return FindOrEmplaceImageView(image_id, ImageViewInfo(view_type, info.format, range)); -} - -template <class P> -template <typename Func> -void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; - static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; - boost::container::small_vector<ImageId, 32> images; - boost::container::small_vector<ImageMapId, 32> maps; - ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { - const auto it = page_table.find(page); - if (it == page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageMapId map_id : it->second) { - ImageMapView& map = slot_map_views[map_id]; - if (map.picked) { - continue; - } - if (!map.Overlaps(cpu_addr, size)) { - continue; - } - map.picked = true; - maps.push_back(map_id); - Image& image = slot_images[map.image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(map.image_id); - if constexpr (BOOL_BREAK) { - if (func(map.image_id, image)) { - return true; - } - } else { - func(map.image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } - for (const ImageMapId map_id : maps) { - slot_map_views[map_id].picked = false; - } -} - -template <class P> -template <typename Func> -void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; - static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; - boost::container::small_vector<ImageId, 8> images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template <class P> -template <typename Func> -void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type; - static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; - boost::container::small_vector<ImageId, 8> images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template <class P> -template <typename Func> -void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { - using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; - static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); - for (auto& segment : segments) { - const auto gpu_addr = segment.first; - const auto size = segment.second; - std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - if constexpr (RETURNS_BOOL) { - if (func(gpu_addr, *cpu_addr, size)) { - break; - } - } else { - func(gpu_addr, *cpu_addr, size); - } - } -} - -template <class P> -ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { - Image& image = slot_images[image_id]; - if (const ImageViewId image_view_id = image.FindView(info); image_view_id) { - return image_view_id; - } - const ImageViewId image_view_id = slot_image_views.insert(runtime, info, image_id, image); - image.InsertView(info, image_view_id); - return image_view_id; -} - -template <class P> -void TextureCache<P>::RegisterImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), - "Trying to register an already registered image"); - image.flags |= ImageFlagBits::Registered; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory += Common::AlignUp(tentative_size, 1024); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - auto map_id = - slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - image.map_view_id = map_id; - return; - } - std::vector<ImageViewId> sparse_maps{}; - ForEachSparseSegment( - image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); - ForEachCPUPage(cpu_addr, size, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - sparse_maps.push_back(map_id); - }); - sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); -} - -template <class P> -void TextureCache<P>::UnregisterImage(ImageId image_id) { - Image& image = slot_images[image_id]; - ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), - "Trying to unregister an already registered image"); - image.flags &= ~ImageFlagBits::Registered; - image.flags &= ~ImageFlagBits::BadOverlap; - u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); - if ((IsPixelFormatASTC(image.info.format) && - True(image.flags & ImageFlagBits::AcceleratedUpload)) || - True(image.flags & ImageFlagBits::Converted)) { - tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); - } - total_used_memory -= Common::AlignUp(tentative_size, 1024); - const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) { - const auto page_it = selected_page_table.find(page); - if (page_it == selected_page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector<ImageId>& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); - }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - const auto map_id = image.map_view_id; - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector<ImageMapId>& image_map_ids = page_it->second; - const auto vector_it = std::ranges::find(image_map_ids, map_id); - if (vector_it == image_map_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_map_ids.erase(vector_it); - }); - slot_map_views.erase(map_id); - return; - } - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); - }); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map_range = slot_map_views[map_view_id]; - const VAddr cpu_addr = map_range.cpu_addr; - const std::size_t size = map_range.size; - ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector<ImageMapId>& image_map_ids = page_it->second; - auto vector_it = image_map_ids.begin(); - while (vector_it != image_map_ids.end()) { - ImageMapView& map = slot_map_views[*vector_it]; - if (map.image_id != image_id) { - vector_it++; - continue; - } - if (!map.picked) { - map.picked = true; - } - vector_it = image_map_ids.erase(vector_it); - } - }); - slot_map_views.erase(map_view_id); - } - sparse_views.erase(it); -} - -template <class P> -void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) { - ASSERT(False(image.flags & ImageFlagBits::Tracked)); - image.flags |= ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); - return; - } - if (True(image.flags & ImageFlagBits::Registered)) { - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - } - return; - } - ForEachSparseSegment(image, - [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - }); -} - -template <class P> -void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) { - ASSERT(True(image.flags & ImageFlagBits::Tracked)); - image.flags &= ~ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); - return; - } - ASSERT(True(image.flags & ImageFlagBits::Registered)); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - } -} - -template <class P> -void TextureCache<P>::DeleteImage(ImageId image_id) { - ImageBase& image = slot_images[image_id]; - const GPUVAddr gpu_addr = image.gpu_addr; - const auto alloc_it = image_allocs_table.find(gpu_addr); - if (alloc_it == image_allocs_table.end()) { - UNREACHABLE_MSG("Trying to delete an image alloc that does not exist in address 0x{:x}", - gpu_addr); - return; - } - const ImageAllocId alloc_id = alloc_it->second; - std::vector<ImageId>& alloc_images = slot_image_allocs[alloc_id].images; - const auto alloc_image_it = std::ranges::find(alloc_images, image_id); - if (alloc_image_it == alloc_images.end()) { - UNREACHABLE_MSG("Trying to delete an image that does not exist"); - return; - } - ASSERT_MSG(False(image.flags & ImageFlagBits::Tracked), "Image was not untracked"); - ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); - - // Mark render targets as dirty - auto& dirty = maxwell3d.dirty.flags; - dirty[Dirty::RenderTargets] = true; - dirty[Dirty::ZetaBuffer] = true; - for (size_t rt = 0; rt < NUM_RT; ++rt) { - dirty[Dirty::ColorBuffer0 + rt] = true; - } - const std::span<const ImageViewId> image_view_ids = image.image_view_ids; - for (const ImageViewId image_view_id : image_view_ids) { - std::ranges::replace(render_targets.color_buffer_ids, image_view_id, ImageViewId{}); - if (render_targets.depth_buffer_id == image_view_id) { - render_targets.depth_buffer_id = ImageViewId{}; - } - } - RemoveImageViewReferences(image_view_ids); - RemoveFramebuffers(image_view_ids); - - for (const AliasedImage& alias : image.aliased_images) { - ImageBase& other_image = slot_images[alias.id]; - [[maybe_unused]] const size_t num_removed_aliases = - std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { - return other_alias.id == image_id; - }); - other_image.CheckAliasState(); - ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", - num_removed_aliases); - } - for (const ImageId overlap_id : image.overlapping_images) { - ImageBase& other_image = slot_images[overlap_id]; - [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( - other_image.overlapping_images, - [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); - other_image.CheckBadOverlapState(); - ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", - num_removed_overlaps); - } - for (const ImageViewId image_view_id : image_view_ids) { - sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); - slot_image_views.erase(image_view_id); - } - sentenced_images.Push(std::move(slot_images[image_id])); - slot_images.erase(image_id); - - alloc_images.erase(alloc_image_it); - if (alloc_images.empty()) { - image_allocs_table.erase(alloc_it); - } - if constexpr (ENABLE_VALIDATION) { - std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); - std::ranges::fill(compute_image_view_ids, CORRUPT_ID); - } - graphics_image_table.Invalidate(); - compute_image_table.Invalidate(); - has_deleted_images = true; -} - -template <class P> -void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { - auto it = image_views.begin(); - while (it != image_views.end()) { - const auto found = std::ranges::find(removed_views, it->second); - if (found != removed_views.end()) { - it = image_views.erase(it); - } else { - ++it; - } - } -} - -template <class P> -void TextureCache<P>::RemoveFramebuffers(std::span<const ImageViewId> removed_views) { - auto it = framebuffers.begin(); - while (it != framebuffers.end()) { - if (it->first.Contains(removed_views)) { - it = framebuffers.erase(it); - } else { - ++it; - } - } -} - -template <class P> -void TextureCache<P>::MarkModification(ImageBase& image) noexcept { - image.flags |= ImageFlagBits::GpuModified; - image.modification_tick = ++modification_tick; -} - -template <class P> -void TextureCache<P>::SynchronizeAliases(ImageId image_id) { - boost::container::small_vector<const AliasedImage*, 1> aliased_images; - ImageBase& image = slot_images[image_id]; - u64 most_recent_tick = image.modification_tick; - for (const AliasedImage& aliased : image.aliased_images) { - ImageBase& aliased_image = slot_images[aliased.id]; - if (image.modification_tick < aliased_image.modification_tick) { - most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick); - aliased_images.push_back(&aliased); - } - } - if (aliased_images.empty()) { - return; - } - image.modification_tick = most_recent_tick; - std::ranges::sort(aliased_images, [this](const AliasedImage* lhs, const AliasedImage* rhs) { - const ImageBase& lhs_image = slot_images[lhs->id]; - const ImageBase& rhs_image = slot_images[rhs->id]; - return lhs_image.modification_tick < rhs_image.modification_tick; - }); - for (const AliasedImage* const aliased : aliased_images) { - CopyImage(image_id, aliased->id, aliased->copies); - } -} - -template <class P> -void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool invalidate) { - Image& image = slot_images[image_id]; - if (invalidate) { - image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); - if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image, image_id); - } - } else { - RefreshContents(image, image_id); - SynchronizeAliases(image_id); - } - if (is_modification) { - MarkModification(image); - } - image.frame_tick = frame_tick; -} - -template <class P> -void TextureCache<P>::PrepareImageView(ImageViewId image_view_id, bool is_modification, - bool invalidate) { - if (!image_view_id) { - return; - } - const ImageViewBase& image_view = slot_image_views[image_view_id]; - if (image_view.IsBuffer()) { - return; - } - PrepareImage(image_view.image_id, is_modification, invalidate); -} - -template <class P> -void TextureCache<P>::CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies) { - Image& dst = slot_images[dst_id]; - Image& src = slot_images[src_id]; - const auto dst_format_type = GetFormatType(dst.info.format); - const auto src_format_type = GetFormatType(src.info.format); - if (src_format_type == dst_format_type) { - if constexpr (HAS_EMULATED_COPIES) { - if (!runtime.CanImageBeCopied(dst, src)) { - return runtime.EmulateCopyImage(dst, src, copies); - } - } - return runtime.CopyImage(dst, src, copies); - } - UNIMPLEMENTED_IF(dst.info.type != ImageType::e2D); - UNIMPLEMENTED_IF(src.info.type != ImageType::e2D); - for (const ImageCopy& copy : copies) { - UNIMPLEMENTED_IF(copy.dst_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_subresource.num_layers != 1); - UNIMPLEMENTED_IF(copy.src_offset != Offset3D{}); - UNIMPLEMENTED_IF(copy.dst_offset != Offset3D{}); - - const SubresourceBase dst_base{ - .level = copy.dst_subresource.base_level, - .layer = copy.dst_subresource.base_layer, - }; - const SubresourceBase src_base{ - .level = copy.src_subresource.base_level, - .layer = copy.src_subresource.base_layer, - }; - const SubresourceExtent dst_extent{.levels = 1, .layers = 1}; - const SubresourceExtent src_extent{.levels = 1, .layers = 1}; - const SubresourceRange dst_range{.base = dst_base, .extent = dst_extent}; - const SubresourceRange src_range{.base = src_base, .extent = src_extent}; - const ImageViewInfo dst_view_info(ImageViewType::e2D, dst.info.format, dst_range); - const ImageViewInfo src_view_info(ImageViewType::e2D, src.info.format, src_range); - const auto [dst_framebuffer_id, dst_view_id] = RenderTargetFromImage(dst_id, dst_view_info); - Framebuffer* const dst_framebuffer = &slot_framebuffers[dst_framebuffer_id]; - const ImageViewId src_view_id = FindOrEmplaceImageView(src_id, src_view_info); - ImageView& dst_view = slot_image_views[dst_view_id]; - ImageView& src_view = slot_image_views[src_view_id]; - [[maybe_unused]] const Extent3D expected_size{ - .width = std::min(dst_view.size.width, src_view.size.width), - .height = std::min(dst_view.size.height, src_view.size.height), - .depth = std::min(dst_view.size.depth, src_view.size.depth), - }; - UNIMPLEMENTED_IF(copy.extent != expected_size); - - runtime.ConvertImage(dst_framebuffer, dst_view, src_view); - } -} - -template <class P> -void TextureCache<P>::BindRenderTarget(ImageViewId* old_id, ImageViewId new_id) { - if (*old_id == new_id) { - return; - } - if (*old_id) { - const ImageViewBase& old_view = slot_image_views[*old_id]; - if (True(old_view.flags & ImageViewFlagBits::PreemtiveDownload)) { - uncommitted_downloads.push_back(old_view.image_id); - } - } - *old_id = new_id; -} - -template <class P> -std::pair<FramebufferId, ImageViewId> TextureCache<P>::RenderTargetFromImage( - ImageId image_id, const ImageViewInfo& view_info) { - const ImageViewId view_id = FindOrEmplaceImageView(image_id, view_info); - const ImageBase& image = slot_images[image_id]; - const bool is_color = GetFormatType(image.info.format) == SurfaceType::ColorTexture; - const ImageViewId color_view_id = is_color ? view_id : ImageViewId{}; - const ImageViewId depth_view_id = is_color ? ImageViewId{} : view_id; - const Extent3D extent = MipSize(image.info.size, view_info.range.base.level); - const u32 num_samples = image.info.num_samples; - const auto [samples_x, samples_y] = SamplesLog2(num_samples); - const FramebufferId framebuffer_id = GetFramebufferId(RenderTargets{ - .color_buffer_ids = {color_view_id}, - .depth_buffer_id = depth_view_id, - .size = {extent.width >> samples_x, extent.height >> samples_y}, - }); - return {framebuffer_id, view_id}; -} - -template <class P> -bool TextureCache<P>::IsFullClear(ImageViewId id) { - if (!id) { - return true; - } - const ImageViewBase& image_view = slot_image_views[id]; - const ImageBase& image = slot_images[image_view.image_id]; - const Extent3D size = image_view.size; - const auto& regs = maxwell3d.regs; - const auto& scissor = regs.scissor_test[0]; - if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { - // Images with multiple resources can't be cleared in a single call - return false; - } - if (regs.clear_flags.scissor == 0) { - // If scissor testing is disabled, the clear is always full - return true; - } - // Make sure the clear covers all texels in the subresource - return scissor.min_x == 0 && scissor.min_y == 0 && scissor.max_x >= size.width && - scissor.max_y >= size.height; -} - -} // namespace VideoCommon |