diff options
Diffstat (limited to 'src/video_core')
37 files changed, 188 insertions, 341 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b57c0d4d4..83e7a1cde 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -29,10 +29,10 @@ namespace VideoCommon { using MapInterval = std::shared_ptr<MapIntervalBase>; -template <typename TBuffer, typename TBufferType, typename StreamBuffer> +template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> class BufferCache { public: - using BufferInfo = std::pair<const TBufferType*, u64>; + using BufferInfo = std::pair<BufferType, u64>; BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, bool is_written = false, bool use_fast_cbuf = false) { @@ -89,9 +89,7 @@ public: } } - const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); - - return {ToHandle(block), offset}; + return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))}; } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. @@ -156,7 +154,7 @@ public: } } - virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; + virtual BufferType GetEmptyBuffer(std::size_t size) = 0; protected: explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, @@ -166,19 +164,19 @@ protected: ~BufferCache() = default; - virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; + virtual BufferType ToHandle(const OwnerBuffer& storage) = 0; virtual void WriteBarrier() = 0; - virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; + virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; - virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, const u8* data) = 0; - virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size, u8* data) = 0; - virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, + virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) = 0; virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { @@ -221,9 +219,8 @@ private: return std::make_shared<MapIntervalBase>(start, end, gpu_addr); } - MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, + MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, const std::size_t size) { - std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); @@ -272,7 +269,7 @@ private: return new_map; } - void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, + void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, std::vector<MapInterval>& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; @@ -313,7 +310,7 @@ private: void FlushMap(MapInterval map) { std::size_t size = map->GetEnd() - map->GetStart(); - TBuffer block = blocks[map->GetStart() >> block_page_bits]; + OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; staging_buffer.resize(size); DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); @@ -328,7 +325,7 @@ private: buffer_ptr += size; buffer_offset += size; - return {&stream_buffer_handle, uploaded_offset}; + return {stream_buffer_handle, uploaded_offset}; } void AlignBuffer(std::size_t alignment) { @@ -338,11 +335,11 @@ private: buffer_offset = offset_aligned; } - TBuffer EnlargeBlock(TBuffer buffer) { + OwnerBuffer EnlargeBlock(OwnerBuffer buffer) { const std::size_t old_size = buffer->GetSize(); const std::size_t new_size = old_size + block_page_size; const VAddr cpu_addr = buffer->GetCpuAddr(); - TBuffer new_buffer = CreateBlock(cpu_addr, new_size); + OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size); CopyBlock(buffer, new_buffer, 0, 0, old_size); buffer->SetEpoch(epoch); pending_destruction.push_back(buffer); @@ -356,14 +353,14 @@ private: return new_buffer; } - TBuffer MergeBlocks(TBuffer first, TBuffer second) { + OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) { const std::size_t size_1 = first->GetSize(); const std::size_t size_2 = second->GetSize(); const VAddr first_addr = first->GetCpuAddr(); const VAddr second_addr = second->GetCpuAddr(); const VAddr new_addr = std::min(first_addr, second_addr); const std::size_t new_size = size_1 + size_2; - TBuffer new_buffer = CreateBlock(new_addr, new_size); + OwnerBuffer new_buffer = CreateBlock(new_addr, new_size); CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); first->SetEpoch(epoch); @@ -380,8 +377,8 @@ private: return new_buffer; } - TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { - TBuffer found{}; + OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { + OwnerBuffer found; const VAddr cpu_addr_end = cpu_addr + size - 1; u64 page_start = cpu_addr >> block_page_bits; const u64 page_end = cpu_addr_end >> block_page_bits; @@ -457,7 +454,7 @@ private: Core::System& system; std::unique_ptr<StreamBuffer> stream_buffer; - TBufferType stream_buffer_handle{}; + BufferType stream_buffer_handle{}; bool invalidated = false; @@ -475,9 +472,9 @@ private: static constexpr u64 block_page_bits = 21; static constexpr u64 block_page_size = 1ULL << block_page_bits; - std::unordered_map<u64, TBuffer> blocks; + std::unordered_map<u64, OwnerBuffer> blocks; - std::list<TBuffer> pending_destruction; + std::list<OwnerBuffer> pending_destruction; u64 epoch = 0; u64 modified_ticks = 0; diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5e9cfba22..7231597d4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1507,7 +1507,7 @@ union Instruction { TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. - if (texture_info >= 0 && texture_info <= 1) { + if (texture_info <= 1) { return TextureType::Texture1D; } if (texture_info == 2 || texture_info == 8 || texture_info == 12 || diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index a3389d0d2..fd49bc2a9 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,8 +6,8 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -17,10 +17,7 @@ namespace Tegra { MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer}, system{system} { - std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); - std::fill(page_table.attributes.begin(), page_table.attributes.end(), - Common::PageType::Unmapped); - page_table.Resize(address_space_width); + page_table.Resize(address_space_width, page_bits, false); // Initialize the map with a single free region covering the entire managed space. VirtualMemoryArea initial_vma; @@ -55,9 +52,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; @@ -70,9 +67,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; } @@ -89,9 +86,10 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::None) + ->PageTable() + .SetMemoryAttribute(cpu_addr.value(), size, + Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::None) .IsSuccess()); return gpu_addr; @@ -147,16 +145,8 @@ T MemoryManager::Read(GPUVAddr addr) const { return value; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr); - return 0; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); + return {}; } @@ -173,17 +163,7 @@ void MemoryManager::Write(GPUVAddr addr, T data) { return; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast<u32>(data), addr); - return; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); } template u8 MemoryManager::Read<u8>(GPUVAddr addr) const; @@ -249,18 +229,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(src_addr, copy_amount); - memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. + rasterizer.FlushRegion(src_addr, copy_amount); + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); page_index++; page_offset = 0; @@ -305,18 +278,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; - // Invalidate must happen on the rasterizer interface, such that memory is always - // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(dest_addr, copy_amount); - memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; + // Invalidate must happen on the rasterizer interface, such that memory is always + // synchronous when it is written (even when in asynchronous GPU mode). + rasterizer.InvalidateRegion(dest_addr, copy_amount); + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); page_index++; page_offset = 0; @@ -362,8 +328,8 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; - const std::size_t page = (addr & Memory::PAGE_MASK) + size; - return page <= Memory::PAGE_SIZE; + const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size; + return page <= Core::Memory::PAGE_SIZE; } void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, @@ -375,12 +341,13 @@ void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageTy ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); - std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); - if (memory == nullptr) { - std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); - std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end, - backing_addr); + while (base != end) { + page_table.pointers[base] = nullptr; + page_table.backing_addr[base] = 0; + + base += 1; + } } else { while (base != end) { page_table.pointers[base] = memory; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0d9468535..0ddd52d5a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -179,7 +179,7 @@ private: /// End of address space, based on address space in bits. static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; - Common::BackingPageTable page_table{page_bits}; + Common::PageTable page_table; VMAMap vma_map; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index d01db97da..53622ca05 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -23,15 +23,15 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } // Anonymous namespace -RasterizerAccelerated::RasterizerAccelerated(Memory::Memory& cpu_memory_) +RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { std::lock_guard lock{pages_mutex}; - const u64 page_start{addr >> Memory::PAGE_BITS}; - const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; + const u64 page_start{addr >> Core::Memory::PAGE_BITS}; + const u64 page_end{(addr + size + Core::Memory::PAGE_SIZE - 1) >> Core::Memory::PAGE_BITS}; // Interval maps will erase segments if count reaches 0, so if delta is negative we have to // subtract after iterating @@ -44,8 +44,8 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del const auto interval = pair.first & pages_interval; const int count = pair.second; - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const VAddr interval_start_addr = boost::icl::first(interval) << Core::Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Core::Memory::PAGE_BITS; const u64 interval_size = interval_end_addr - interval_start_addr; if (delta > 0 && count == delta) { diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 315798e7c..91866d7dd 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/rasterizer_interface.h" -namespace Memory { +namespace Core::Memory { class Memory; } @@ -20,7 +20,7 @@ namespace VideoCore { /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. class RasterizerAccelerated : public RasterizerInterface { public: - explicit RasterizerAccelerated(Memory::Memory& cpu_memory_); + explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); ~RasterizerAccelerated() override; void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; @@ -30,7 +30,7 @@ private: CachedPageMap cached_pages; std::mutex pages_mutex; - Memory::Memory& cpu_memory; + Core::Memory::Memory& cpu_memory; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 4eb37a96c..cb5792407 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -55,33 +55,31 @@ void OGLBufferCache::WriteBarrier() { glMemoryBarrier(GL_ALL_BARRIER_BITS); } -const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { +GLuint OGLBufferCache::ToHandle(const Buffer& buffer) { return buffer->GetHandle(); } -const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { - static const GLuint null_buffer = 0; - return &null_buffer; +GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) { + return 0; } void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) { - glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), + glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), data); } void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, u8* data) { MICROPROFILE_SCOPE(OpenGL_Buffer_Download); - glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), + glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size), data); } void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { - glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), - static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), - static_cast<GLsizeiptr>(size)); + glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset), + static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size)); } OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, @@ -89,7 +87,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); const GLuint& cbuf = cbufs[cbuf_cursor++]; glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); - return {&cbuf, 0}; + return {cbuf, 0}; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index d94a11252..a74817857 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -34,12 +34,12 @@ public: explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); ~CachedBufferBlock(); - const GLuint* GetHandle() const { - return &gl_buffer.handle; + GLuint GetHandle() const { + return gl_buffer.handle; } private: - OGLBuffer gl_buffer{}; + OGLBuffer gl_buffer; }; class OGLBufferCache final : public GenericBufferCache { @@ -48,7 +48,7 @@ public: const Device& device, std::size_t stream_size); ~OGLBufferCache(); - const GLuint* GetEmptyBuffer(std::size_t) override; + GLuint GetEmptyBuffer(std::size_t) override; void Acquire() noexcept { cbuf_cursor = 0; @@ -57,9 +57,9 @@ public: protected: Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - void WriteBarrier() override; + GLuint ToHandle(const Buffer& buffer) override; - const GLuint* ToHandle(const Buffer& buffer) override; + void WriteBarrier() override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index c286502ba..d83dca25a 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -87,7 +87,7 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; - static std::array<std::size_t, 5> stage_swizzle = {0, 1, 2, 3, 4}; + static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index f12e9f55f..d7ba57aca 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -94,9 +94,9 @@ CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { - VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); cache = rhs.cache; type = rhs.type; + CachedQueryBase<HostCounter>::operator=(std::move(rhs)); return *this; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f4598fbf7..175374f0d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -188,10 +188,8 @@ void RasterizerOpenGL::SetupVertexBuffer() { ASSERT(end > start); const u64 size = end - start + 1; const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); - - // Bind the vertex array to the buffer at the current offset. - vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer, - vertex_buffer_offset, vertex_array.stride); + glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset, + vertex_array.stride); } } @@ -222,7 +220,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - vertex_array_pushbuffer.SetIndexBuffer(buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer); return offset; } @@ -524,7 +522,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Prepare vertex array format. SetupVertexFormat(); - vertex_array_pushbuffer.Setup(); // Upload vertex and index data. SetupVertexBuffer(); @@ -534,17 +531,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { index_buffer_offset = SetupIndexBuffer(); } - // Prepare packed bindings. - bind_ubo_pushbuffer.Setup(); - bind_ssbo_pushbuffer.Setup(); - // Setup emulation uniform buffer. GLShader::MaxwellUniformData ubo; ubo.SetFromRegs(gpu); const auto [buffer, offset] = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, - static_cast<GLsizeiptr>(sizeof(ubo))); + glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset, + static_cast<GLsizeiptr>(sizeof(ubo))); // Setup shaders and their used resources. texture_cache.GuardSamplers(true); @@ -557,11 +550,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { // Signal the buffer cache that we are not going to upload more things. buffer_cache.Unmap(); - // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. - vertex_array_pushbuffer.Bind(); - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - program_manager.BindGraphicsPipeline(); if (texture_cache.TextureBarrier()) { @@ -630,17 +618,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); - bind_ubo_pushbuffer.Setup(); - bind_ssbo_pushbuffer.Setup(); - SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); buffer_cache.Unmap(); - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; @@ -771,8 +753,8 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const const ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, - sizeof(float)); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, + sizeof(float)); return; } @@ -783,7 +765,7 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, device.HasFastBufferSubData()); - bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); + glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { @@ -819,7 +801,8 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); - bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); + glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset, + static_cast<GLsizeiptr>(size)); } void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { @@ -1432,7 +1415,7 @@ void RasterizerOpenGL::EndTransformFeedback() { const GPUVAddr gpu_addr = binding.Address(); const std::size_t size = binding.buffer_size; const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 435da4425..caea174d2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -231,9 +231,7 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker}; - BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; - BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + GLint vertex_binding = 0; std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> transform_feedback_buffers; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 12c6dcfde..6d2ff20f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -34,8 +34,6 @@ namespace OpenGL { using Tegra::Engines::ShaderType; -using VideoCommon::Shader::CompileDepth; -using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::Registry; using VideoCommon::Shader::ShaderIR; @@ -45,7 +43,7 @@ namespace { constexpr u32 STAGE_MAIN_OFFSET = 10; constexpr u32 KERNEL_MAIN_OFFSET = 0; -constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile}; +constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index b1804e9ea..22242cce9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -835,7 +835,8 @@ private: void DeclareConstantBuffers() { u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, cbuf] : ir.GetConstantBuffers()) { + for (const auto& buffers : ir.GetConstantBuffers()) { + const auto index = buffers.first; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, GetConstBufferBlock(index)); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); @@ -1144,6 +1145,7 @@ private: return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; default: UNREACHABLE(); + return {"0", Type::Int}; } case Attribute::Index::FrontColor: return {"gl_Color"s + GetSwizzle(element), Type::Float}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 89f0e04ef..2c0c77c28 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -191,6 +191,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_LINEAR_MIPMAP_LINEAR; } + break; } case Tegra::Texture::TextureFilter::Nearest: { switch (mip_filter_mode) { @@ -201,6 +202,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_NEAREST_MIPMAP_LINEAR; } + break; } } LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index b751086fa..6d7bb16b2 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -14,68 +14,6 @@ namespace OpenGL { -struct VertexArrayPushBuffer::Entry { - GLuint binding_index{}; - const GLuint* buffer{}; - GLintptr offset{}; - GLsizei stride{}; -}; - -VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker) - : state_tracker{state_tracker} {} - -VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; - -void VertexArrayPushBuffer::Setup() { - index_buffer = nullptr; - vertex_buffers.clear(); -} - -void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { - index_buffer = buffer; -} - -void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, - GLintptr offset, GLsizei stride) { - vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); -} - -void VertexArrayPushBuffer::Bind() { - if (index_buffer) { - state_tracker.BindIndexBuffer(*index_buffer); - } - - for (const auto& entry : vertex_buffers) { - glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride); - } -} - -struct BindBuffersRangePushBuffer::Entry { - GLuint binding; - const GLuint* buffer; - GLintptr offset; - GLsizeiptr size; -}; - -BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} - -BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; - -void BindBuffersRangePushBuffer::Setup() { - entries.clear(); -} - -void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset, - GLsizeiptr size) { - entries.push_back(Entry{binding, buffer, offset, size}); -} - -void BindBuffersRangePushBuffer::Bind() { - for (const Entry& entry : entries) { - glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size); - } -} - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { if (!GLAD_GL_KHR_debug) { // We don't need to throw an error as this is just for debugging diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 47ee3177b..9c09ee12c 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,49 +11,6 @@ namespace OpenGL { -class StateTracker; - -class VertexArrayPushBuffer final { -public: - explicit VertexArrayPushBuffer(StateTracker& state_tracker); - ~VertexArrayPushBuffer(); - - void Setup(); - - void SetIndexBuffer(const GLuint* buffer); - - void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, - GLsizei stride); - - void Bind(); - -private: - struct Entry; - - StateTracker& state_tracker; - - const GLuint* index_buffer{}; - std::vector<Entry> vertex_buffers; -}; - -class BindBuffersRangePushBuffer final { -public: - explicit BindBuffersRangePushBuffer(GLenum target); - ~BindBuffersRangePushBuffer(); - - void Setup(); - - void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size); - - void Bind(); - -private: - struct Entry; - - GLenum target; - std::vector<Entry> entries; -}; - void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index dd590c38b..04532f8f8 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -42,7 +42,7 @@ #include <vulkan/vulkan_win32.h> #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -119,7 +119,7 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); break; #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) case Core::Frontend::WindowSystemType::X11: extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); break; @@ -345,7 +345,7 @@ bool RendererVulkan::CreateSurface() { } } #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) if (window_info.type == Core::Frontend::WindowSystemType::X11) { const VkXlibSurfaceCreateInfoKHR xlib_ci{ VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 0d167afbd..81e1de2be 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -74,18 +74,18 @@ Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) { return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); } -const VkBuffer* VKBufferCache::ToHandle(const Buffer& buffer) { +VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) { return buffer->GetHandle(); } -const VkBuffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { +VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) { size = std::max(size, std::size_t(4)); const auto& empty = staging_pool.GetUnusedBuffer(size, false); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { cmdbuf.FillBuffer(buffer, 0, size, 0); }); - return empty.handle.address(); + return *empty.handle; } void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, @@ -94,7 +94,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st std::memcpy(staging.commit->Map(size), data, size); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); @@ -117,7 +117,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, u8* data) { const auto& staging = staging_pool.GetUnusedBuffer(size, true); scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset, size](vk::CommandBuffer cmdbuf) { VkBufferMemoryBarrier barrier; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; @@ -144,7 +144,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) { scheduler.RequestOutsideRenderPassOperationContext(); - scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, + scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset, dst_offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index d3c23da98..3cd2e2774 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -33,8 +33,8 @@ public: VAddr cpu_addr, std::size_t size); ~CachedBufferBlock(); - const VkBuffer* GetHandle() const { - return buffer.handle.address(); + VkBuffer GetHandle() const { + return *buffer.handle; } private: @@ -50,15 +50,15 @@ public: VKScheduler& scheduler, VKStagingBufferPool& staging_pool); ~VKBufferCache(); - const VkBuffer* GetEmptyBuffer(std::size_t size) override; + VkBuffer GetEmptyBuffer(std::size_t size) override; protected: + VkBuffer ToHandle(const Buffer& buffer) override; + void WriteBarrier() override {} Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; - const VkBuffer* ToHandle(const Buffer& buffer) override; - void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 9d92305f4..878a78755 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -343,13 +343,13 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, QuadArrayPass::~QuadArrayPass() = default; -std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { +std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { const u32 num_triangle_vertices = num_vertices * 6 / 4; const std::size_t staging_size = num_triangle_vertices * sizeof(u32); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); scheduler.RequestOutsideRenderPassOperationContext(); @@ -377,7 +377,7 @@ std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertice cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); }); - return {buffer.handle.address(), 0}; + return {*buffer.handle, 0}; } Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, @@ -391,14 +391,14 @@ Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, Uint8Pass::~Uint8Pass() = default; -std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, - u64 src_offset) { +std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, + u64 src_offset) { const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); update_descriptor_queue.Acquire(); - update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); - update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size); + update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); scheduler.RequestOutsideRenderPassOperationContext(); @@ -422,7 +422,7 @@ std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer s cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); }); - return {buffer.handle.address(), 0}; + return {*buffer.handle, 0}; } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index c62516bff..ec80c8683 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -50,7 +50,7 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~QuadArrayPass(); - std::pair<const VkBuffer*, VkDeviceSize> Assemble(u32 num_vertices, u32 first); + std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first); private: VKScheduler& scheduler; @@ -65,7 +65,7 @@ public: VKUpdateDescriptorQueue& update_descriptor_queue); ~Uint8Pass(); - std::pair<const VkBuffer*, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); + std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); private: VKScheduler& scheduler; diff --git a/src/video_core/renderer_vulkan/vk_memory_manager.h b/src/video_core/renderer_vulkan/vk_memory_manager.h index 35ee54d30..5b6858e9b 100644 --- a/src/video_core/renderer_vulkan/vk_memory_manager.h +++ b/src/video_core/renderer_vulkan/vk_memory_manager.h @@ -32,7 +32,7 @@ public: * memory. When passing false, it will try to allocate device local memory. * @returns A memory commit. */ - VKMemoryCommit Commit(const VkMemoryRequirements& reqs, bool host_visible); + VKMemoryCommit Commit(const VkMemoryRequirements& requirements, bool host_visible); /// Commits memory required by the buffer and binds it. VKMemoryCommit Commit(const vk::Buffer& buffer, bool host_visible); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 774ba1f26..4ca0febb8 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -137,13 +137,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry class BufferBindings final { public: - void AddVertexBinding(const VkBuffer* buffer, VkDeviceSize offset) { - vertex.buffer_ptrs[vertex.num_buffers] = buffer; + void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) { + vertex.buffers[vertex.num_buffers] = buffer; vertex.offsets[vertex.num_buffers] = offset; ++vertex.num_buffers; } - void SetIndexBinding(const VkBuffer* buffer, VkDeviceSize offset, VkIndexType type) { + void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) { index.buffer = buffer; index.offset = offset; index.type = type; @@ -227,19 +227,19 @@ private: // Some of these fields are intentionally left uninitialized to avoid initializing them twice. struct { std::size_t num_buffers = 0; - std::array<const VkBuffer*, Maxwell::NumVertexArrays> buffer_ptrs; + std::array<VkBuffer, Maxwell::NumVertexArrays> buffers; std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; } vertex; struct { - const VkBuffer* buffer = nullptr; + VkBuffer buffer = nullptr; VkDeviceSize offset; VkIndexType type; } index; template <std::size_t N> void BindStatic(VKScheduler& scheduler) const { - if (index.buffer != nullptr) { + if (index.buffer) { BindStatic<N, true>(scheduler); } else { BindStatic<N, false>(scheduler); @@ -254,18 +254,14 @@ private: } std::array<VkBuffer, N> buffers; - std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), - [](const auto ptr) { return *ptr; }); - std::array<VkDeviceSize, N> offsets; + std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin()); std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); if constexpr (is_indexed) { // Indexed draw - scheduler.Record([buffers, offsets, index_buffer = *index.buffer, - index_offset = index.offset, - index_type = index.type](vk::CommandBuffer cmdbuf) { - cmdbuf.BindIndexBuffer(index_buffer, index_offset, index_type); + scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) { + cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type); cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); }); } else { @@ -790,7 +786,7 @@ void RasterizerVulkan::BeginTransformFeedback() { const std::size_t size = binding.buffer_size; const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); - scheduler.Record([buffer = *buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { + scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); }); @@ -870,7 +866,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { - std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); + std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset); format = Maxwell::IndexFormat::UnsignedShort; } @@ -1007,8 +1003,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd const auto size = memory_manager.Read<u32>(address + 8); if (size == 0) { - // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because - // Vulkan doesn't like empty buffers. + // Sometimes global memory pointers don't have a proper size. Upload a dummy entry + // because Vulkan doesn't like empty buffers. constexpr std::size_t dummy_size = 4; const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp index 4bfec0077..681ecde98 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.cpp +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.cpp @@ -35,12 +35,13 @@ void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template payload.clear(); } + // TODO(Rodrigo): Rework to write the payload directly const auto payload_start = payload.data() + payload.size(); for (const auto& entry : entries) { if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { payload.push_back(*image); - } else if (const auto buffer = std::get_if<Buffer>(&entry)) { - payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); + } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) { + payload.push_back(*buffer); } else if (const auto texel = std::get_if<VkBufferView>(&entry)) { payload.push_back(*texel); } else { diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index a9e3d5dba..6ba2c9997 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -18,12 +18,11 @@ class VKScheduler; class DescriptorUpdateEntry { public: - explicit DescriptorUpdateEntry() : image{} {} + explicit DescriptorUpdateEntry() {} DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} - DescriptorUpdateEntry(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) - : buffer{buffer, offset, size} {} + DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {} DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} @@ -54,8 +53,8 @@ public: entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); } - void AddBuffer(const VkBuffer* buffer, u64 offset, std::size_t size) { - entries.push_back(Buffer{buffer, offset, size}); + void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) { + entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size}); } void AddTexelBuffer(VkBufferView texel_buffer) { @@ -67,12 +66,7 @@ public: } private: - struct Buffer { - const VkBuffer* buffer = nullptr; - u64 offset = 0; - std::size_t size = 0; - }; - using Variant = std::variant<VkDescriptorImageInfo, Buffer, VkBufferView>; + using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>; const VKDevice& device; VKScheduler& scheduler; diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 2e2711350..6d313963a 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -484,17 +484,17 @@ bool TryInspectAddress(CFGRebuildState& state) { } case BlockCollision::Inside: { // This case is the tricky one: - // We need to Split the block in 2 sepparate blocks + // We need to split the block into 2 separate blocks const u32 end = state.block_info[block_index].end; BlockInfo& new_block = CreateBlockInfo(state, address, end); BlockInfo& current_block = state.block_info[block_index]; current_block.end = address - 1; - new_block.branch = current_block.branch; + new_block.branch = std::move(current_block.branch); BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>(); const auto branch = std::get_if<SingleBranch>(forward_branch.get()); branch->address = address; branch->ignore = true; - current_block.branch = forward_branch; + current_block.branch = std::move(forward_branch); return true; } default: diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 0dd7a1196..85ee9aa5e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -352,8 +352,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); } else { const Node image_register = GetRegister(instr.gpr39); - const auto [base_image, buffer, offset] = TrackCbuf( - image_register, global_code, static_cast<s64>(global_code.size())); + const auto result = TrackCbuf(image_register, global_code, + static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); descriptor = registry.ObtainBindlessSampler(buffer, offset); } if (!descriptor) { @@ -497,9 +499,12 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); - const auto [base_image, buffer, offset] = + const auto result = TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); + const auto it = std::find_if(std::begin(used_images), std::end(used_images), [buffer = buffer, offset = offset](const Image& entry) { diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp index 3b391d3e6..d4ffa8014 100644 --- a/src/video_core/shader/decode/shift.cpp +++ b/src/video_core/shader/decode/shift.cpp @@ -23,7 +23,6 @@ Node IsFull(Node shift) { } Node Shift(OperationCode opcode, Node value, Node shift) { - Node is_full = Operation(OperationCode::LogicalIEqual, shift, Immediate(32)); Node shifted = Operation(opcode, move(value), shift); return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted)); } diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8852c8a1b..822674926 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -56,8 +56,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsed(offset); + used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); return MakeNode<CbufNode>(index, Immediate(offset)); } @@ -66,8 +65,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsedIndirect(); + used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); Node final_offset = [&] { // Attempt to inline constant buffer without a variable offset. This is done to allow @@ -166,6 +164,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe std::move(value), Immediate(16)); value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, std::move(value), Immediate(16)); + return value; case Register::Size::Word: // Default - do nothing return value; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 10739b37d..513e9bf49 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -27,8 +27,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); - auto [found, internal_cursor] = FindOperation( + auto result = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); + auto& found = result.first; if (found) { return {std::move(found), cursor}; } @@ -75,12 +76,13 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons s64 cursor) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; - } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + } + if (const auto operation = std::get_if<OperationNode>(&*offset)) { const u32 bound_buffer = registry.GetBoundBuffer(); if (bound_buffer != cbuf->GetIndex()) { return {}; @@ -93,12 +95,12 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); const auto& gpu_driver = registry.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); - const Node op = + Node op = Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); const Node cv_node = GetCustomVariable(bindless_cv); Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); - const std::size_t amend_index = DeclareAmend(amend_op); + const std::size_t amend_index = DeclareAmend(std::move(amend_op)); AmendNodeCv(amend_index, code[cursor]); // TODO Implement Bindless Index custom variable auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), @@ -141,7 +143,7 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { return {tracked, cbuf->GetIndex(), immediate->GetValue()}; } @@ -186,8 +188,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand - const auto [found, found_cursor] = - TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto& found = result.first; if (!found) { return {}; } diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 7af0e792c..715f39d0d 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -248,8 +248,14 @@ void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, // Use an extra temporal buffer auto& tmp_buffer = staging_cache.GetBuffer(1); + // Special case for 3D Texture Segments + const bool must_read_current_data = + params.block_depth > 0 && params.target == VideoCore::Surface::SurfaceTarget::Texture2D; tmp_buffer.resize(guest_memory_size); host_ptr = tmp_buffer.data(); + if (must_read_current_data) { + memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); + } if (params.is_tiled) { ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h index a39a8661b..c5ab21f56 100644 --- a/src/video_core/texture_cache/surface_base.h +++ b/src/video_core/texture_cache/surface_base.h @@ -72,9 +72,9 @@ public: return (cpu_addr < end) && (cpu_addr_end > start); } - bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { + bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) const { const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; - return (gpu_addr <= other_start && other_end <= gpu_addr_end); + return gpu_addr <= other_start && other_end <= gpu_addr_end; } // Use only when recycling a surface diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6f3ef45be..0de499946 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -167,7 +167,6 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { const auto& regs = system.GPU().Maxwell3D().regs; - regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type; SurfaceParams params; params.is_tiled = regs.zeta.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; diff --git a/src/video_core/texture_cache/surface_view.cpp b/src/video_core/texture_cache/surface_view.cpp index 57a1f5803..6b5f5984b 100644 --- a/src/video_core/texture_cache/surface_view.cpp +++ b/src/video_core/texture_cache/surface_view.cpp @@ -20,4 +20,8 @@ bool ViewParams::operator==(const ViewParams& rhs) const { std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); } +bool ViewParams::operator!=(const ViewParams& rhs) const { + return !operator==(rhs); +} + } // namespace VideoCommon diff --git a/src/video_core/texture_cache/surface_view.h b/src/video_core/texture_cache/surface_view.h index b17fd11a9..90a8bb0ae 100644 --- a/src/video_core/texture_cache/surface_view.h +++ b/src/video_core/texture_cache/surface_view.h @@ -21,6 +21,7 @@ struct ViewParams { std::size_t Hash() const; bool operator==(const ViewParams& rhs) const; + bool operator!=(const ViewParams& rhs) const; bool IsLayered() const { switch (target) { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 4edd4313b..69ca08fd1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -614,10 +614,10 @@ private: * textures within the GPU if possible. Falls back to LLE when it isn't possible to use any of * the HLE methods. * - * @param overlaps The overlapping surfaces registered in the cache. - * @param params The parameters on the new surface. - * @param gpu_addr The starting address of the new surface. - * @param cache_addr The starting address of the new surface on physical memory. + * @param overlaps The overlapping surfaces registered in the cache. + * @param params The parameters on the new surface. + * @param gpu_addr The starting address of the new surface. + * @param cpu_addr The starting address of the new surface on physical memory. */ std::optional<std::pair<TSurface, TView>> Manage3DSurfaces(std::vector<TSurface>& overlaps, const SurfaceParams& params, @@ -647,7 +647,8 @@ private: break; } const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); - const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); + const auto offsets = params.GetBlockOffsetXYZ(offset); + const auto z = std::get<2>(offsets); modified |= surface->IsModified(); const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, 1); |