diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/buffer_cache/buffer_base.h | 20 | ||||
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 61 | ||||
-rw-r--r-- | src/video_core/command_classes/codecs/vp9.cpp | 1 | ||||
-rw-r--r-- | src/video_core/command_classes/codecs/vp9_types.h | 85 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_blit_screen.cpp | 4 | ||||
-rw-r--r-- | src/video_core/texture_cache/image_base.h | 2 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 92 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 8 | ||||
-rw-r--r-- | src/video_core/textures/decoders.cpp | 8 |
9 files changed, 113 insertions, 168 deletions
diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index c3318095c..be2113f5a 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -261,16 +261,6 @@ public: stream_score += score; } - /// Sets the new frame tick - void SetFrameTick(u64 new_frame_tick) noexcept { - frame_tick = new_frame_tick; - } - - /// Returns the new frame tick - [[nodiscard]] u64 FrameTick() const noexcept { - return frame_tick; - } - /// Returns the likeliness of this being a stream buffer [[nodiscard]] int StreamScore() const noexcept { return stream_score; @@ -307,6 +297,14 @@ public: return words.size_bytes; } + size_t getLRUID() const noexcept { + return lru_id; + } + + void setLRUID(size_t lru_id_) { + lru_id = lru_id_; + } + private: template <Type type> u64* Array() noexcept { @@ -603,9 +601,9 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; Words words; - u64 frame_tick = 0; BufferFlagBits flags{}; int stream_score = 0; + size_t lru_id = SIZE_MAX; }; } // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 3b43554f9..7bfd57369 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -20,6 +20,7 @@ #include "common/common_types.h" #include "common/div_ceil.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "common/microprofile.h" #include "common/scope_exit.h" #include "common/settings.h" @@ -330,7 +331,7 @@ private: template <bool insert> void ChangeRegister(BufferId buffer_id); - void TouchBuffer(Buffer& buffer) const noexcept; + void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept; bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); @@ -428,7 +429,11 @@ private: size_t immediate_buffer_capacity = 0; std::unique_ptr<u8[]> immediate_buffer_alloc; - typename SlotVector<Buffer>::Iterator deletion_iterator; + struct LRUItemParams { + using ObjectType = BufferId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; u64 frame_tick = 0; u64 total_used_memory = 0; @@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); - deletion_iterator = slot_buffers.end(); common_ranges.clear(); } @@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_buffers.end()) { - deletion_iterator = slot_buffers.begin(); - } - ++deletion_iterator; - if (deletion_iterator == slot_buffers.end()) { - break; - } - const auto [buffer_id, buffer] = *deletion_iterator; - if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { - DownloadBufferMemory(*buffer); - DeleteBuffer(buffer_id); + const auto clean_up = [this, &num_iterations](BufferId buffer_id) { + if (num_iterations == 0) { + return true; } - } + --num_iterations; + auto& buffer = slot_buffers[buffer_id]; + DownloadBufferMemory(buffer); + DeleteBuffer(buffer_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template <class P> @@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { + if (total_used_memory >= EXPECTED_MEMORY) { RunGarbageCollector(); } ++frame_tick; @@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) { template <class P> void BufferCache<P>::BindHostIndexBuffer() { Buffer& buffer = slot_buffers[index_buffer.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, index_buffer.buffer_id); const u32 offset = buffer.Offset(index_buffer.cpu_addr); const u32 size = index_buffer.size; SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); @@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { const Binding& binding = vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size); @@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { const Binding& binding = storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() { for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { const Binding& binding = compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; - TouchBuffer(buffer); + TouchBuffer(buffer, binding.buffer_id); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); const u32 size = static_cast<u32>(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); - TouchBuffer(slot_buffers[new_buffer_id]); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } Register(new_buffer_id); + TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id); return new_buffer_id; } @@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) { template <class P> template <bool insert> void BufferCache<P>::ChangeRegister(BufferId buffer_id) { - const Buffer& buffer = slot_buffers[buffer_id]; + Buffer& buffer = slot_buffers[buffer_id]; const auto size = buffer.SizeBytes(); if (insert) { total_used_memory += Common::AlignUp(size, 1024); + buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick)); } else { total_used_memory -= Common::AlignUp(size, 1024); + lru_cache.Free(buffer.getLRUID()); } const VAddr cpu_addr_begin = buffer.CpuAddr(); const VAddr cpu_addr_end = cpu_addr_begin + size; @@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) { } template <class P> -void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept { - buffer.SetFrameTick(frame_tick); +void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept { + if (buffer_id != NULL_BUFFER_ID) { + lru_cache.Touch(buffer.getLRUID(), frame_tick); + } } template <class P> diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 70030066a..d7e749485 100644 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -742,6 +742,7 @@ VpxBitStreamWriter VP9::ComposeUncompressedHeader() { uncomp_writer.WriteDeltaQ(current_frame_info.uv_dc_delta_q); uncomp_writer.WriteDeltaQ(current_frame_info.uv_ac_delta_q); + ASSERT(!current_frame_info.segment_enabled); uncomp_writer.WriteBit(false); // Segmentation enabled (TODO). const s32 min_tile_cols_log2 = CalcMinLog2TileCols(current_frame_info.frame_size.width); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 87eafdb03..3b1ed4b3a 100644 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -22,7 +22,7 @@ struct Vp9FrameDimensions { }; static_assert(sizeof(Vp9FrameDimensions) == 0x8, "Vp9 Vp9FrameDimensions is an invalid size"); -enum FrameFlags : u32 { +enum class FrameFlags : u32 { IsKeyFrame = 1 << 0, LastFrameIsKeyFrame = 1 << 1, FrameSizeChanged = 1 << 2, @@ -30,6 +30,7 @@ enum FrameFlags : u32 { LastShowFrame = 1 << 4, IntraOnly = 1 << 5, }; +DECLARE_ENUM_FLAG_OPERATORS(FrameFlags) enum class TxSize { Tx4x4 = 0, // 4x4 transform @@ -92,44 +93,34 @@ struct Vp9EntropyProbs { static_assert(sizeof(Vp9EntropyProbs) == 0x7B4, "Vp9EntropyProbs is an invalid size"); struct Vp9PictureInfo { - bool is_key_frame; - bool intra_only; - bool last_frame_was_key; - bool frame_size_changed; - bool error_resilient_mode; - bool last_frame_shown; - bool show_frame; + u32 bitstream_size; + std::array<u64, 4> frame_offsets; std::array<s8, 4> ref_frame_sign_bias; s32 base_q_index; s32 y_dc_delta_q; s32 uv_dc_delta_q; s32 uv_ac_delta_q; - bool lossless; s32 transform_mode; - bool allow_high_precision_mv; s32 interp_filter; s32 reference_mode; - s8 comp_fixed_ref; - std::array<s8, 2> comp_var_ref; s32 log2_tile_cols; s32 log2_tile_rows; - bool segment_enabled; - bool segment_map_update; - bool segment_map_temporal_update; - s32 segment_abs_delta; - std::array<u32, 8> segment_feature_enable; - std::array<std::array<s16, 4>, 8> segment_feature_data; - bool mode_ref_delta_enabled; - bool use_prev_in_find_mv_refs; std::array<s8, 4> ref_deltas; std::array<s8, 2> mode_deltas; Vp9EntropyProbs entropy; Vp9FrameDimensions frame_size; u8 first_level; u8 sharpness_level; - u32 bitstream_size; - std::array<u64, 4> frame_offsets; - std::array<bool, 4> refresh_frame; + bool is_key_frame; + bool intra_only; + bool last_frame_was_key; + bool error_resilient_mode; + bool last_frame_shown; + bool show_frame; + bool lossless; + bool allow_high_precision_mv; + bool segment_enabled; + bool mode_ref_delta_enabled; }; struct Vp9FrameContainer { @@ -145,7 +136,7 @@ struct PictureInfo { Vp9FrameDimensions golden_frame_size; ///< 0x50 Vp9FrameDimensions alt_frame_size; ///< 0x58 Vp9FrameDimensions current_frame_size; ///< 0x60 - u32 vp9_flags; ///< 0x68 + FrameFlags vp9_flags; ///< 0x68 std::array<s8, 4> ref_frame_sign_bias; ///< 0x6C u8 first_level; ///< 0x70 u8 sharpness_level; ///< 0x71 @@ -158,60 +149,43 @@ struct PictureInfo { u8 allow_high_precision_mv; ///< 0x78 u8 interp_filter; ///< 0x79 u8 reference_mode; ///< 0x7A - s8 comp_fixed_ref; ///< 0x7B - std::array<s8, 2> comp_var_ref; ///< 0x7C + INSERT_PADDING_BYTES_NOINIT(3); ///< 0x7B u8 log2_tile_cols; ///< 0x7E u8 log2_tile_rows; ///< 0x7F Segmentation segmentation; ///< 0x80 LoopFilter loop_filter; ///< 0xE4 - INSERT_PADDING_BYTES_NOINIT(5); ///< 0xEB - u32 surface_params; ///< 0xF0 - INSERT_PADDING_WORDS_NOINIT(3); ///< 0xF4 + INSERT_PADDING_BYTES_NOINIT(21); ///< 0xEB [[nodiscard]] Vp9PictureInfo Convert() const { return { - .is_key_frame = (vp9_flags & FrameFlags::IsKeyFrame) != 0, - .intra_only = (vp9_flags & FrameFlags::IntraOnly) != 0, - .last_frame_was_key = (vp9_flags & FrameFlags::LastFrameIsKeyFrame) != 0, - .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, - .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, - .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, - .show_frame = true, + .bitstream_size = bitstream_size, + .frame_offsets{}, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, .uv_dc_delta_q = uv_dc_delta_q, .uv_ac_delta_q = uv_ac_delta_q, - .lossless = lossless != 0, .transform_mode = tx_mode, - .allow_high_precision_mv = allow_high_precision_mv != 0, .interp_filter = interp_filter, .reference_mode = reference_mode, - .comp_fixed_ref = comp_fixed_ref, - .comp_var_ref = comp_var_ref, .log2_tile_cols = log2_tile_cols, .log2_tile_rows = log2_tile_rows, - .segment_enabled = segmentation.enabled != 0, - .segment_map_update = segmentation.update_map != 0, - .segment_map_temporal_update = segmentation.temporal_update != 0, - .segment_abs_delta = segmentation.abs_delta, - .segment_feature_enable = segmentation.feature_mask, - .segment_feature_data = segmentation.feature_data, - .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, - .use_prev_in_find_mv_refs = !(vp9_flags == (FrameFlags::ErrorResilientMode)) && - !(vp9_flags == (FrameFlags::FrameSizeChanged)) && - !(vp9_flags == (FrameFlags::IntraOnly)) && - (vp9_flags == (FrameFlags::LastShowFrame)) && - !(vp9_flags == (FrameFlags::LastFrameIsKeyFrame)), .ref_deltas = loop_filter.ref_deltas, .mode_deltas = loop_filter.mode_deltas, .entropy{}, .frame_size = current_frame_size, .first_level = first_level, .sharpness_level = sharpness_level, - .bitstream_size = bitstream_size, - .frame_offsets{}, - .refresh_frame{}, + .is_key_frame = True(vp9_flags & FrameFlags::IsKeyFrame), + .intra_only = True(vp9_flags & FrameFlags::IntraOnly), + .last_frame_was_key = True(vp9_flags & FrameFlags::LastFrameIsKeyFrame), + .error_resilient_mode = True(vp9_flags & FrameFlags::ErrorResilientMode), + .last_frame_shown = True(vp9_flags & FrameFlags::LastShowFrame), + .show_frame = true, + .lossless = lossless != 0, + .allow_high_precision_mv = allow_high_precision_mv != 0, + .segment_enabled = segmentation.enabled != 0, + .mode_ref_delta_enabled = loop_filter.mode_ref_delta_enabled != 0, }; } }; @@ -316,7 +290,6 @@ ASSERT_POSITION(last_frame_size, 0x48); ASSERT_POSITION(first_level, 0x70); ASSERT_POSITION(segmentation, 0x80); ASSERT_POSITION(loop_filter, 0xE4); -ASSERT_POSITION(surface_params, 0xF0); #undef ASSERT_POSITION #define ASSERT_POSITION(field_name, position) \ diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 5c43b8acf..cb0580182 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -159,11 +159,13 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset; const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr); - const size_t size_bytes = GetSizeInBytes(framebuffer); // TODO(Rodrigo): Read this from HLE constexpr u32 block_height_log2 = 4; const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer); + const u64 size_bytes{Tegra::Texture::CalculateSize(true, bytes_per_pixel, + framebuffer.stride, framebuffer.height, + 1, block_height_log2, 0)}; Tegra::Texture::UnswizzleTexture( mapped_span.subspan(image_offset, size_bytes), std::span(host_ptr, size_bytes), bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..0c17a791b 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -80,7 +80,7 @@ struct ImageBase { VAddr cpu_addr_end = 0; u64 modification_tick = 0; - u64 frame_tick = 0; + size_t lru_index = SIZE_MAX; std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{}; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a087498ff..24b809242 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -5,7 +5,6 @@ #pragma once #include "common/alignment.h" -#include "common/settings.h" #include "video_core/dirty_flags.h" #include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/texture_cache_base.h" @@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_image_views.insert(runtime, NullImageParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); - deletion_iterator = slot_images.begin(); - if constexpr (HAS_DEVICE_MEMORY_INFO) { const auto device_memory = runtime.GetDeviceLocalMemory(); const u64 possible_expected_memory = (device_memory * 3) / 10; @@ -64,70 +61,38 @@ template <class P> void TextureCache<P>::RunGarbageCollector() { const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; - int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); - for (; num_iterations > 0; --num_iterations) { - if (deletion_iterator == slot_images.end()) { - deletion_iterator = slot_images.begin(); - if (deletion_iterator == slot_images.end()) { - break; - } + const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL; + size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5); + const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) { + if (num_iterations == 0) { + return true; } - auto [image_id, image_tmp] = *deletion_iterator; - Image* image = image_tmp; // fix clang error. - const bool is_alias = True(image->flags & ImageFlagBits::Alias); - const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); - const bool must_download = image->IsSafeDownload(); - bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); - const u64 ticks_needed = - is_bad_overlap - ? ticks_to_destroy >> 4 - : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); - should_care |= aggressive_mode; - if (should_care && image->frame_tick + ticks_needed < frame_tick) { - if (is_bad_overlap) { - const bool overlap_check = std::ranges::all_of( - image->overlapping_images, [&, image](const ImageId& overlap_id) { - auto& overlap = slot_images[overlap_id]; - return overlap.frame_tick >= image->frame_tick; - }); - if (!overlap_check) { - ++deletion_iterator; - continue; - } - } - if (!is_bad_overlap && must_download) { - const bool alias_check = std::ranges::none_of( - image->aliased_images, [&, image](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick < image->frame_tick) || - (alias_image.modification_tick < image->modification_tick); - }); - - if (alias_check) { - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); - } - } - if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); - } - UnregisterImage(image_id); - DeleteImage(image_id); - if (is_bad_overlap) { - ++num_iterations; - } + --num_iterations; + auto& image = slot_images[image_id]; + const bool must_download = image.IsSafeDownload(); + if (!high_priority_mode && must_download) { + return false; } - ++deletion_iterator; - } + if (must_download) { + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); + } + if (True(image.flags & ImageFlagBits::Tracked)) { + UntrackImage(image, image_id); + } + UnregisterImage(image_id); + DeleteImage(image_id); + return false; + }; + lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up); } template <class P> void TextureCache<P>::TickFrame() { - if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { + if (total_used_memory > minimum_memory) { RunGarbageCollector(); } sentenced_images.Tick(); @@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); + image.lru_index = lru_cache.Insert(image_id, frame_tick); + ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); if (False(image.flags & ImageFlagBits::Sparse)) { @@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); + lru_cache.Free(image.lru_index); const auto& clear_page_table = [this, image_id]( u64 page, @@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool if (is_modification) { MarkModification(image); } - image.frame_tick = frame_tick; + lru_cache.Touch(image.lru_index, frame_tick); } template <class P> diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index e4ae351cb..d7528ed24 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/literals.h" +#include "common/lru_cache.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/engines/fermi_2d.h" @@ -370,6 +371,12 @@ private: std::vector<ImageId> uncommitted_downloads; std::queue<std::vector<ImageId>> committed_downloads; + struct LRUItemParams { + using ObjectType = ImageId; + using TickType = u64; + }; + Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache; + static constexpr size_t TICKS_TO_DESTROY = 6; DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images; DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view; @@ -379,7 +386,6 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; - typename SlotVector<Image>::Iterator deletion_iterator; }; } // namespace VideoCommon diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp index c010b9353..24e943e4c 100644 --- a/src/video_core/textures/decoders.cpp +++ b/src/video_core/textures/decoders.cpp @@ -63,14 +63,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 const u32 unswizzled_offset = slice * pitch * height + line * pitch + column * BYTES_PER_PIXEL; - if (const auto offset = (TO_LINEAR ? unswizzled_offset : swizzled_offset); - offset >= input.size()) { - // TODO(Rodrigo): This is an out of bounds access that should never happen. To - // avoid crashing the emulator, break. - ASSERT_MSG(false, "offset {} exceeds input size {}!", offset, input.size()); - break; - } - u8* const dst = &output[TO_LINEAR ? swizzled_offset : unswizzled_offset]; const u8* const src = &input[TO_LINEAR ? unswizzled_offset : swizzled_offset]; |