diff options
Diffstat (limited to '')
-rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.h | 2 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.cpp | 8 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_texture_cache.h | 6 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache.h | 104 | ||||
-rw-r--r-- | src/video_core/texture_cache/texture_cache_base.h | 6 |
5 files changed, 91 insertions, 35 deletions
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 113528e9b..5d9d370f2 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -354,6 +354,7 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; @@ -361,6 +362,7 @@ struct TextureCacheParams { using ImageView = OpenGL::ImageView; using Sampler = OpenGL::Sampler; using Framebuffer = OpenGL::Framebuffer; + using AsyncBuffer = u32; }; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index a65bbeb1c..d39372ec4 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -812,8 +812,12 @@ StagingBufferRef TextureCacheRuntime::UploadStagingBuffer(size_t size) { return staging_buffer_pool.Request(size, MemoryUsage::Upload); } -StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size) { - return staging_buffer_pool.Request(size, MemoryUsage::Download); +StagingBufferRef TextureCacheRuntime::DownloadStagingBuffer(size_t size, bool deferred) { + return staging_buffer_pool.Request(size, MemoryUsage::Download, deferred); +} + +void TextureCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { + staging_buffer_pool.FreeDeferred(ref); } bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 7ec0df134..1f27a3589 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -51,7 +51,9 @@ public: StagingBufferRef UploadStagingBuffer(size_t size); - StagingBufferRef DownloadStagingBuffer(size_t size); + StagingBufferRef DownloadStagingBuffer(size_t size, bool deferred = false); + + void FreeDeferredStagingBuffer(StagingBufferRef& ref); void TickFrame(); @@ -347,6 +349,7 @@ struct TextureCacheParams { static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; static constexpr bool HAS_DEVICE_MEMORY_INFO = true; + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; @@ -354,6 +357,7 @@ struct TextureCacheParams { using ImageView = Vulkan::ImageView; using Sampler = Vulkan::Sampler; using Framebuffer = Vulkan::Framebuffer; + using AsyncBuffer = Vulkan::StagingBufferRef; }; using TextureCache = VideoCommon::TextureCache<TextureCacheParams>; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 7fe451b5a..87152c8e9 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -646,7 +646,28 @@ bool TextureCache<P>::ShouldWaitAsyncFlushes() const noexcept { template <class P> void TextureCache<P>::CommitAsyncFlushes() { // This is intentionally passing the value by copy - committed_downloads.push(uncommitted_downloads); + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + const std::span<const ImageId> download_ids = uncommitted_downloads; + if (download_ids.empty()) { + committed_downloads.emplace_back(std::move(uncommitted_downloads)); + uncommitted_downloads.clear(); + async_buffers.emplace_back(std::optional<AsyncBuffer>{}); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes, true); + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64); + } + async_buffers.emplace_back(download_map); + } + committed_downloads.emplace_back(std::move(uncommitted_downloads)); uncommitted_downloads.clear(); } @@ -655,37 +676,58 @@ void TextureCache<P>::PopAsyncFlushes() { if (committed_downloads.empty()) { return; } - const std::span<const ImageId> download_ids = committed_downloads.front(); - if (download_ids.empty()) { - committed_downloads.pop(); - return; - } - size_t total_size_bytes = 0; - for (const ImageId image_id : download_ids) { - total_size_bytes += slot_images[image_id].unswizzled_size_bytes; - } - auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); - const size_t original_offset = download_map.offset; - for (const ImageId image_id : download_ids) { - Image& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(download_map, copies); - download_map.offset += image.unswizzled_size_bytes; - } - // Wait for downloads to finish - runtime.Finish(); - - download_map.offset = original_offset; - std::span<u8> download_span = download_map.mapped_span; - for (const ImageId image_id : download_ids) { - const ImageBase& image = slot_images[image_id]; - const auto copies = FullDownloadCopies(image.info); - SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, - swizzle_data_buffer); - download_map.offset += image.unswizzled_size_bytes; - download_span = download_span.subspan(image.unswizzled_size_bytes); + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + const std::span<const ImageId> download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop_front(); + async_buffers.pop_front(); + return; + } + auto download_map = *async_buffers.front(); + std::span<u8> download_span = download_map.mapped_span; + for (size_t i = download_ids.size(); i > 0; i--) { + const ImageBase& image = slot_images[download_ids[i - 1]]; + const auto copies = FullDownloadCopies(image.info); + download_map.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); + std::span<u8> download_span_alt = download_span.subspan(download_map.offset); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span_alt, + swizzle_data_buffer); + } + runtime.FreeDeferredStagingBuffer(download_map); + committed_downloads.pop_front(); + async_buffers.pop_front(); + } else { + const std::span<const ImageId> download_ids = committed_downloads.front(); + if (download_ids.empty()) { + committed_downloads.pop_front(); + return; + } + size_t total_size_bytes = 0; + for (const ImageId image_id : download_ids) { + total_size_bytes += slot_images[image_id].unswizzled_size_bytes; + } + auto download_map = runtime.DownloadStagingBuffer(total_size_bytes); + const size_t original_offset = download_map.offset; + for (const ImageId image_id : download_ids) { + Image& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(download_map, copies); + download_map.offset += image.unswizzled_size_bytes; + } + // Wait for downloads to finish + runtime.Finish(); + download_map.offset = original_offset; + std::span<u8> download_span = download_map.mapped_span; + for (const ImageId image_id : download_ids) { + const ImageBase& image = slot_images[image_id]; + const auto copies = FullDownloadCopies(image.info); + SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, + swizzle_data_buffer); + download_map.offset += image.unswizzled_size_bytes; + download_span = download_span.subspan(image.unswizzled_size_bytes); + } + committed_downloads.pop_front(); } - committed_downloads.pop(); } template <class P> diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 6b2898705..4eea1f609 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -92,6 +92,8 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; /// True when the API can provide info about the memory of the device. static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; + /// True when the API can do asynchronous texture downloads. + static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()}; @@ -106,6 +108,7 @@ class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelI using ImageView = typename P::ImageView; using Sampler = typename P::Sampler; using Framebuffer = typename P::Framebuffer; + using AsyncBuffer = typename P::AsyncBuffer; struct BlitImages { ImageId dst_id; @@ -403,7 +406,8 @@ private: // TODO: This data structure is not optimal and it should be reworked std::vector<ImageId> uncommitted_downloads; - std::queue<std::vector<ImageId>> committed_downloads; + std::deque<std::vector<ImageId>> committed_downloads; + std::deque<std::optional<AsyncBuffer>> async_buffers; struct LRUItemParams { using ObjectType = ImageId; |