diff options
author | bunnei <bunneidev@gmail.com> | 2021-07-25 20:39:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-07-25 20:39:04 +0200 |
commit | 98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f (patch) | |
tree | 816faa96c2c4d291825063433331a8ea4b3d08f1 /src/video_core/buffer_cache | |
parent | Merge pull request #6699 from lat9nq/common-threads (diff) | |
parent | shader: Support out of bound local memory reads and immediate writes (diff) | |
download | yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.gz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.bz2 yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.lz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.xz yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.tar.zst yuzu-98b26b6e126d4775fdf3f773fe8a8ac808a8ff8f.zip |
Diffstat (limited to 'src/video_core/buffer_cache')
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 269 |
1 files changed, 237 insertions, 32 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5a0b6f0c0..24c858104 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -31,6 +31,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" +#include "video_core/surface.h" #include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/types.h" @@ -42,14 +43,19 @@ MICROPROFILE_DECLARE(GPU_DownloadMemory); using BufferId = SlotId; +using VideoCore::Surface::PixelFormat; +using namespace Common::Literals; + constexpr u32 NUM_VERTEX_BUFFERS = 32; constexpr u32 NUM_TRANSFORM_FEEDBACK_BUFFERS = 4; constexpr u32 NUM_GRAPHICS_UNIFORM_BUFFERS = 18; constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; +constexpr u32 NUM_TEXTURE_BUFFERS = 16; constexpr u32 NUM_STAGES = 5; -using namespace Common::Literals; +using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; +using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; template <typename P> class BufferCache { @@ -67,6 +73,7 @@ class BufferCache { static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; + static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr BufferId NULL_BUFFER_ID{0}; @@ -96,6 +103,10 @@ class BufferCache { BufferId buffer_id; }; + struct TextureBufferBinding : Binding { + PixelFormat format; + }; + static constexpr Binding NULL_BINDING{ .cpu_addr = 0, .size = 0, @@ -133,20 +144,31 @@ public: void BindHostComputeBuffers(); - void SetEnabledUniformBuffers(size_t stage, u32 enabled); + void SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, + const UniformBufferSizes* sizes); - void SetEnabledComputeUniformBuffers(u32 enabled); + void SetComputeUniformBufferState(u32 mask, const ComputeUniformBufferSizes* sizes); void UnbindGraphicsStorageBuffers(size_t stage); void BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindGraphicsTextureBuffers(size_t stage); + + void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format, bool is_written, bool is_image); + void UnbindComputeStorageBuffers(); void BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, u32 cbuf_offset, bool is_written); + void UnbindComputeTextureBuffers(); + + void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, + bool is_written, bool is_image); + void FlushCachedWrites(); /// Return true when there are uncommitted buffers to be downloaded @@ -178,6 +200,7 @@ public: [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); std::mutex mutex; + Runtime& runtime; private: template <typename Func> @@ -254,12 +277,16 @@ private: void BindHostGraphicsStorageBuffers(size_t stage); + void BindHostGraphicsTextureBuffers(size_t stage); + void BindHostTransformFeedbackBuffers(); void BindHostComputeUniformBuffers(); void BindHostComputeStorageBuffers(); + void BindHostComputeTextureBuffers(); + void DoUpdateGraphicsBuffers(bool is_indexed); void DoUpdateComputeBuffers(); @@ -274,6 +301,8 @@ private: void UpdateStorageBuffers(size_t stage); + void UpdateTextureBuffers(size_t stage); + void UpdateTransformFeedbackBuffers(); void UpdateTransformFeedbackBuffer(u32 index); @@ -282,6 +311,8 @@ private: void UpdateComputeStorageBuffers(); + void UpdateComputeTextureBuffers(); + void MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 size); [[nodiscard]] BufferId FindBuffer(VAddr cpu_addr, u32 size); @@ -323,6 +354,9 @@ private: [[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const; + [[nodiscard]] TextureBufferBinding GetTextureBufferBinding(GPUVAddr gpu_addr, u32 size, + PixelFormat format); + [[nodiscard]] std::span<const u8> ImmediateBufferWithData(VAddr cpu_addr, size_t size); [[nodiscard]] std::span<u8> ImmediateBuffer(size_t wanted_capacity); @@ -336,7 +370,6 @@ private: Tegra::Engines::KeplerCompute& kepler_compute; Tegra::MemoryManager& gpu_memory; Core::Memory::Memory& cpu_memory; - Runtime& runtime; SlotVector<Buffer> slot_buffers; DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; @@ -347,20 +380,30 @@ private: std::array<Binding, NUM_VERTEX_BUFFERS> vertex_buffers; std::array<std::array<Binding, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES> uniform_buffers; std::array<std::array<Binding, NUM_STORAGE_BUFFERS>, NUM_STAGES> storage_buffers; + std::array<std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS>, NUM_STAGES> texture_buffers; std::array<Binding, NUM_TRANSFORM_FEEDBACK_BUFFERS> transform_feedback_buffers; std::array<Binding, NUM_COMPUTE_UNIFORM_BUFFERS> compute_uniform_buffers; std::array<Binding, NUM_STORAGE_BUFFERS> compute_storage_buffers; + std::array<TextureBufferBinding, NUM_TEXTURE_BUFFERS> compute_texture_buffers; + + std::array<u32, NUM_STAGES> enabled_uniform_buffer_masks{}; + u32 enabled_compute_uniform_buffer_mask = 0; - std::array<u32, NUM_STAGES> enabled_uniform_buffers{}; - u32 enabled_compute_uniform_buffers = 0; + const UniformBufferSizes* uniform_buffer_sizes{}; + const ComputeUniformBufferSizes* compute_uniform_buffer_sizes{}; std::array<u32, NUM_STAGES> enabled_storage_buffers{}; std::array<u32, NUM_STAGES> written_storage_buffers{}; u32 enabled_compute_storage_buffers = 0; u32 written_compute_storage_buffers = 0; - std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; + std::array<u32, NUM_STAGES> enabled_texture_buffers{}; + std::array<u32, NUM_STAGES> written_texture_buffers{}; + std::array<u32, NUM_STAGES> image_texture_buffers{}; + u32 enabled_compute_texture_buffers = 0; + u32 written_compute_texture_buffers = 0; + u32 image_compute_texture_buffers = 0; std::array<u32, 16> uniform_cache_hits{}; std::array<u32, 16> uniform_cache_shots{}; @@ -371,6 +414,10 @@ private: std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty> dirty_uniform_buffers{}; + std::conditional_t<IS_OPENGL, std::array<u32, NUM_STAGES>, Empty> fast_bound_uniform_buffers{}; + std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, + std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>, Empty> + uniform_buffer_binding_sizes{}; std::vector<BufferId> cached_write_buffer_ids; @@ -394,8 +441,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, Runtime& runtime_) - : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, - gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { + : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, + kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); deletion_iterator = slot_buffers.end(); @@ -615,6 +662,7 @@ void BufferCache<P>::BindHostStageBuffers(size_t stage) { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostGraphicsUniformBuffers(stage); BindHostGraphicsStorageBuffers(stage); + BindHostGraphicsTextureBuffers(stage); } template <class P> @@ -622,21 +670,30 @@ void BufferCache<P>::BindHostComputeBuffers() { MICROPROFILE_SCOPE(GPU_BindUploadBuffers); BindHostComputeUniformBuffers(); BindHostComputeStorageBuffers(); + BindHostComputeTextureBuffers(); } template <class P> -void BufferCache<P>::SetEnabledUniformBuffers(size_t stage, u32 enabled) { +void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask, + const UniformBufferSizes* sizes) { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { - if (enabled_uniform_buffers[stage] != enabled) { - dirty_uniform_buffers[stage] = ~u32{0}; + if (enabled_uniform_buffer_masks != mask) { + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers.fill(0); + } + dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } } - enabled_uniform_buffers[stage] = enabled; + enabled_uniform_buffer_masks = mask; + uniform_buffer_sizes = sizes; } template <class P> -void BufferCache<P>::SetEnabledComputeUniformBuffers(u32 enabled) { - enabled_compute_uniform_buffers = enabled; +void BufferCache<P>::SetComputeUniformBufferState(u32 mask, + const ComputeUniformBufferSizes* sizes) { + enabled_compute_uniform_buffer_mask = mask; + compute_uniform_buffer_sizes = sizes; } template <class P> @@ -657,9 +714,29 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index, } template <class P> +void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { + enabled_texture_buffers[stage] = 0; + written_texture_buffers[stage] = 0; + image_texture_buffers[stage] = 0; +} + +template <class P> +void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, + u32 size, PixelFormat format, bool is_written, + bool is_image) { + enabled_texture_buffers[stage] |= 1U << tbo_index; + written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; + } + texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + +template <class P> void BufferCache<P>::UnbindComputeStorageBuffers() { enabled_compute_storage_buffers = 0; written_compute_storage_buffers = 0; + image_compute_texture_buffers = 0; } template <class P> @@ -677,6 +754,24 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index, } template <class P> +void BufferCache<P>::UnbindComputeTextureBuffers() { + enabled_compute_texture_buffers = 0; + written_compute_texture_buffers = 0; + image_compute_texture_buffers = 0; +} + +template <class P> +void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, + PixelFormat format, bool is_written, bool is_image) { + enabled_compute_texture_buffers |= 1U << tbo_index; + written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; + } + compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); +} + +template <class P> void BufferCache<P>::FlushCachedWrites() { for (const BufferId buffer_id : cached_write_buffer_ids) { slot_buffers[buffer_id].FlushCachedWrites(); @@ -901,7 +996,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffers(size_t stage) { dirty = std::exchange(dirty_uniform_buffers[stage], 0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { const bool needs_bind = ((dirty >> index) & 1) != 0; BindHostGraphicsUniformBuffer(stage, index, binding_index, needs_bind); if constexpr (NEEDS_BIND_UNIFORM_INDEX) { @@ -915,7 +1010,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 bool needs_bind) { const Binding& binding = uniform_buffers[stage][index]; const VAddr cpu_addr = binding.cpu_addr; - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]); Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && @@ -925,8 +1020,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 if constexpr (IS_OPENGL) { if (runtime.HasFastBufferSubData()) { // Fast path for Nvidia - if (!HasFastUniformBufferBound(stage, binding_index)) { + const bool should_fast_bind = + !HasFastUniformBufferBound(stage, binding_index) || + uniform_buffer_binding_sizes[stage][binding_index] != size; + if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } const auto span = ImmediateBufferWithData(cpu_addr, size); @@ -934,8 +1034,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - fast_bound_uniform_buffers[stage] |= 1U << binding_index; - + if constexpr (IS_OPENGL) { + fast_bound_uniform_buffers[stage] |= 1U << binding_index; + uniform_buffer_binding_sizes[stage][binding_index] = size; + } // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size); cpu_memory.ReadBlockUnsafe(cpu_addr, span.data(), size); @@ -948,14 +1050,27 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } ++uniform_cache_shots[0]; - if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) { - // Skip binding if it's not needed and if the bound buffer is not the fast version - // This exists to avoid instances where the fast buffer is bound and a GPU write happens + // Skip binding if it's not needed and if the bound buffer is not the fast version + // This exists to avoid instances where the fast buffer is bound and a GPU write happens + needs_bind |= HasFastUniformBufferBound(stage, binding_index); + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + needs_bind |= uniform_buffer_binding_sizes[stage][binding_index] != size; + } + if (!needs_bind) { return; } - fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - const u32 offset = buffer.Offset(cpu_addr); + if constexpr (IS_OPENGL) { + // Fast buffer will be unbound + fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); + + // Mark the index as dirty if offset doesn't match + const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); + dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; + } + if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { + uniform_buffer_binding_sizes[stage][binding_index] = size; + } if constexpr (NEEDS_BIND_UNIFORM_INDEX) { runtime.BindUniformBuffer(stage, binding_index, buffer, offset, size); } else { @@ -985,6 +1100,28 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { } template <class P> +void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + const TextureBufferBinding& binding = texture_buffers[stage][index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_texture_buffers[stage] >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + +template <class P> void BufferCache<P>::BindHostTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { return; @@ -1006,13 +1143,14 @@ void BufferCache<P>::BindHostComputeUniformBuffers() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { // Mark all uniform buffers as dirty dirty_uniform_buffers.fill(~u32{0}); + fast_bound_uniform_buffers.fill(0); } u32 binding_index = 0; - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; TouchBuffer(buffer); - const u32 size = binding.size; + const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]); SynchronizeBuffer(buffer, binding.cpu_addr, size); const u32 offset = buffer.Offset(binding.cpu_addr); @@ -1047,6 +1185,28 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { } template <class P> +void BufferCache<P>::BindHostComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + const TextureBufferBinding& binding = compute_texture_buffers[index]; + Buffer& buffer = slot_buffers[binding.buffer_id]; + const u32 size = binding.size; + SynchronizeBuffer(buffer, binding.cpu_addr, size); + + const u32 offset = buffer.Offset(binding.cpu_addr); + const PixelFormat format = binding.format; + if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { + if (((image_compute_texture_buffers >> index) & 1) != 0) { + runtime.BindImageBuffer(buffer, offset, size, format); + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + } else { + runtime.BindTextureBuffer(buffer, offset, size, format); + } + }); +} + +template <class P> void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { if (is_indexed) { UpdateIndexBuffer(); @@ -1056,6 +1216,7 @@ void BufferCache<P>::DoUpdateGraphicsBuffers(bool is_indexed) { for (size_t stage = 0; stage < NUM_STAGES; ++stage) { UpdateUniformBuffers(stage); UpdateStorageBuffers(stage); + UpdateTextureBuffers(stage); } } @@ -1063,6 +1224,7 @@ template <class P> void BufferCache<P>::DoUpdateComputeBuffers() { UpdateComputeUniformBuffers(); UpdateComputeStorageBuffers(); + UpdateComputeTextureBuffers(); } template <class P> @@ -1132,7 +1294,7 @@ void BufferCache<P>::UpdateVertexBuffer(u32 index) { template <class P> void BufferCache<P>::UpdateUniformBuffers(size_t stage) { - ForEachEnabledBit(enabled_uniform_buffers[stage], [&](u32 index) { + ForEachEnabledBit(enabled_uniform_buffer_masks[stage], [&](u32 index) { Binding& binding = uniform_buffers[stage][index]; if (binding.buffer_id) { // Already updated @@ -1163,6 +1325,18 @@ void BufferCache<P>::UpdateStorageBuffers(size_t stage) { } template <class P> +void BufferCache<P>::UpdateTextureBuffers(size_t stage) { + ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { + Binding& binding = texture_buffers[stage][index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark buffer as written if needed + if (((written_texture_buffers[stage] >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } + }); +} + +template <class P> void BufferCache<P>::UpdateTransformFeedbackBuffers() { if (maxwell3d.regs.tfb_enabled == 0) { return; @@ -1193,7 +1367,7 @@ void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) { template <class P> void BufferCache<P>::UpdateComputeUniformBuffers() { - ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { + ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) { Binding& binding = compute_uniform_buffers[index]; binding = NULL_BINDING; const auto& launch_desc = kepler_compute.launch_description; @@ -1214,11 +1388,22 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { // Resolve buffer Binding& binding = compute_storage_buffers[index]; - const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); - binding.buffer_id = buffer_id; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); // Mark as written if needed if (((written_compute_storage_buffers >> index) & 1) != 0) { - MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); + } + }); +} + +template <class P> +void BufferCache<P>::UpdateComputeTextureBuffers() { + ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { + Binding& binding = compute_texture_buffers[index]; + binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); + // Mark as written if needed + if (((written_compute_texture_buffers >> index) & 1) != 0) { + MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); } }); } @@ -1551,6 +1736,7 @@ template <class P> void BufferCache<P>::NotifyBufferDeletion() { if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { dirty_uniform_buffers.fill(~u32{0}); + uniform_buffer_binding_sizes.fill({}); } auto& flags = maxwell3d.dirty.flags; flags[Dirty::IndexBuffer] = true; @@ -1578,6 +1764,25 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s } template <class P> +typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding( + GPUVAddr gpu_addr, u32 size, PixelFormat format) { + const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + TextureBufferBinding binding; + if (!cpu_addr || size == 0) { + binding.cpu_addr = 0; + binding.size = 0; + binding.buffer_id = NULL_BUFFER_ID; + binding.format = PixelFormat::Invalid; + } else { + binding.cpu_addr = *cpu_addr; + binding.size = size; + binding.buffer_id = BufferId{}; + binding.format = format; + } + return binding; +} + +template <class P> std::span<const u8> BufferCache<P>::ImmediateBufferWithData(VAddr cpu_addr, size_t size) { u8* const base_pointer = cpu_memory.GetPointer(cpu_addr); if (IsRangeGranular(cpu_addr, size) || |