diff options
Diffstat (limited to 'src/video_core')
-rw-r--r-- | src/video_core/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/video_core/buffer_cache/buffer_cache.h | 227 | ||||
-rw-r--r-- | src/video_core/buffer_cache/map_interval.cpp | 33 | ||||
-rw-r--r-- | src/video_core/buffer_cache/map_interval.h | 133 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_fence_manager.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 23 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_buffer_cache.cpp | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_fence_manager.h | 1 | ||||
-rw-r--r-- | src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 23 | ||||
-rw-r--r-- | src/video_core/shader/decode/other.cpp | 21 | ||||
-rw-r--r-- | src/video_core/shader/node.h | 5 |
12 files changed, 287 insertions, 183 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d23c53843..f00c71dae 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(video_core STATIC buffer_cache/buffer_block.h buffer_cache/buffer_cache.h + buffer_cache/map_interval.cpp buffer_cache/map_interval.h dirty_flags.cpp dirty_flags.h diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 56e570994..d9a4a1b4d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -12,11 +12,12 @@ #include <utility> #include <vector> -#include <boost/icl/interval_map.hpp> +#include <boost/container/small_vector.hpp> #include <boost/icl/interval_set.hpp> -#include <boost/range/iterator_range.hpp> +#include <boost/intrusive/set.hpp> #include "common/alignment.h" +#include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" @@ -29,10 +30,12 @@ namespace VideoCommon { -using MapInterval = std::shared_ptr<MapIntervalBase>; - template <typename OwnerBuffer, typename BufferType, typename StreamBuffer> class BufferCache { + using IntervalSet = boost::icl::interval_set<VAddr>; + using IntervalType = typename IntervalSet::interval_type; + using VectorMapInterval = boost::container::small_vector<MapInterval*, 1>; + public: using BufferInfo = std::pair<BufferType, u64>; @@ -40,14 +43,12 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - const std::optional<VAddr> cpu_addr_opt = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - + const auto& memory_manager = system.GPU().MemoryManager(); + const std::optional<VAddr> cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { return {GetEmptyBuffer(size), 0}; } - - VAddr cpu_addr = *cpu_addr_opt; + const VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. @@ -77,16 +78,19 @@ public: } } - auto block = GetBlock(cpu_addr, size); - auto map = MapAddress(block, gpu_addr, cpu_addr, size); + OwnerBuffer block = GetBlock(cpu_addr, size); + MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); + if (!map) { + return {GetEmptyBuffer(size), 0}; + } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { MarkForAsyncFlush(map); } - if (!map->IsWritten()) { - map->MarkAsWritten(true); - MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + if (!map->is_written) { + map->is_written = true; + MarkRegionAsWritten(map->start, map->end - 1); } } @@ -132,12 +136,11 @@ public: void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - std::vector<MapInterval> objects = GetMapsInRange(addr, size); - std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { - return a->GetModificationTick() < b->GetModificationTick(); - }); - for (auto& object : objects) { - if (object->IsModified() && object->IsRegistered()) { + VectorMapInterval objects = GetMapsInRange(addr, size); + std::sort(objects.begin(), objects.end(), + [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); + for (MapInterval* object : objects) { + if (object->is_modified && object->is_registered) { mutex.unlock(); FlushMap(object); mutex.lock(); @@ -148,9 +151,9 @@ public: bool MustFlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - const std::vector<MapInterval> objects = GetMapsInRange(addr, size); - return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { - return map->IsModified() && map->IsRegistered(); + const VectorMapInterval objects = GetMapsInRange(addr, size); + return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { + return map->is_modified && map->is_registered; }); } @@ -158,9 +161,8 @@ public: void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; - std::vector<MapInterval> objects = GetMapsInRange(addr, size); - for (auto& object : objects) { - if (object->IsRegistered()) { + for (auto& object : GetMapsInRange(addr, size)) { + if (object->is_registered) { Unregister(object); } } @@ -169,10 +171,10 @@ public: void OnCPUWrite(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - for (const auto& object : GetMapsInRange(addr, size)) { - if (object->IsMemoryMarked() && object->IsRegistered()) { + for (MapInterval* object : GetMapsInRange(addr, size)) { + if (object->is_memory_marked && object->is_registered) { UnmarkMemory(object); - object->SetSyncPending(true); + object->is_sync_pending = true; marked_for_unregister.emplace_back(object); } } @@ -181,9 +183,9 @@ public: void SyncGuestHost() { std::lock_guard lock{mutex}; - for (const auto& object : marked_for_unregister) { - if (object->IsRegistered()) { - object->SetSyncPending(false); + for (auto& object : marked_for_unregister) { + if (object->is_registered) { + object->is_sync_pending = false; Unregister(object); } } @@ -192,9 +194,9 @@ public: void CommitAsyncFlushes() { if (uncommitted_flushes) { - auto commit_list = std::make_shared<std::list<MapInterval>>(); - for (auto& map : *uncommitted_flushes) { - if (map->IsRegistered() && map->IsModified()) { + auto commit_list = std::make_shared<std::list<MapInterval*>>(); + for (MapInterval* map : *uncommitted_flushes) { + if (map->is_registered && map->is_modified) { // TODO(Blinkhawk): Implement backend asynchronous flushing // AsyncFlushMap(map) commit_list->push_back(map); @@ -228,8 +230,8 @@ public: committed_flushes.pop_front(); return; } - for (MapInterval& map : *flush_list) { - if (map->IsRegistered()) { + for (MapInterval* map : *flush_list) { + if (map->is_registered) { // TODO(Blinkhawk): Replace this for reading the asynchronous flush FlushMap(map); } @@ -265,61 +267,60 @@ protected: } /// Register an object into the cache - void Register(const MapInterval& new_map, bool inherit_written = false) { - const VAddr cpu_addr = new_map->GetStart(); + MapInterval* Register(MapInterval new_map, bool inherit_written = false) { + const VAddr cpu_addr = new_map.start; if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", - new_map->GetGpuAddress()); - return; + new_map.gpu_addr); + return nullptr; } - const std::size_t size = new_map->GetEnd() - new_map->GetStart(); - new_map->MarkAsRegistered(true); - const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; - mapped_addresses.insert({interval, new_map}); + const std::size_t size = new_map.end - new_map.start; + new_map.is_registered = true; rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - new_map->SetMemoryMarked(true); + new_map.is_memory_marked = true; if (inherit_written) { - MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); - new_map->MarkAsWritten(true); + MarkRegionAsWritten(new_map.start, new_map.end - 1); + new_map.is_written = true; } + MapInterval* const storage = mapped_addresses_allocator.Allocate(); + *storage = new_map; + mapped_addresses.insert(*storage); + return storage; } - void UnmarkMemory(const MapInterval& map) { - if (!map->IsMemoryMarked()) { + void UnmarkMemory(MapInterval* map) { + if (!map->is_memory_marked) { return; } - const std::size_t size = map->GetEnd() - map->GetStart(); - rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); - map->SetMemoryMarked(false); + const std::size_t size = map->end - map->start; + rasterizer.UpdatePagesCachedCount(map->start, size, -1); + map->is_memory_marked = false; } /// Unregisters an object from the cache - void Unregister(const MapInterval& map) { + void Unregister(MapInterval* map) { UnmarkMemory(map); - map->MarkAsRegistered(false); - if (map->IsSyncPending()) { + map->is_registered = false; + if (map->is_sync_pending) { + map->is_sync_pending = false; marked_for_unregister.remove(map); - map->SetSyncPending(false); } - if (map->IsWritten()) { - UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + if (map->is_written) { + UnmarkRegionAsWritten(map->start, map->end - 1); } - const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; - mapped_addresses.erase(delete_interval); + const auto it = mapped_addresses.find(*map); + ASSERT(it != mapped_addresses.end()); + mapped_addresses.erase(it); + mapped_addresses_allocator.Release(map); } private: - MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { - return std::make_shared<MapIntervalBase>(start, end, gpu_addr); - } - - MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, - const std::size_t size) { - std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); + MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, + std::size_t size) { + const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; - MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); @@ -328,13 +329,12 @@ private: memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); } - Register(new_map); - return new_map; + return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } const VAddr cpu_addr_end = cpu_addr + size; if (overlaps.size() == 1) { - MapInterval& current_map = overlaps[0]; + MapInterval* const current_map = overlaps[0]; if (current_map->IsInside(cpu_addr, cpu_addr_end)) { return current_map; } @@ -344,35 +344,39 @@ private: bool write_inheritance = false; bool modified_inheritance = false; // Calculate new buffer parameters - for (auto& overlap : overlaps) { - new_start = std::min(overlap->GetStart(), new_start); - new_end = std::max(overlap->GetEnd(), new_end); - write_inheritance |= overlap->IsWritten(); - modified_inheritance |= overlap->IsModified(); + for (MapInterval* overlap : overlaps) { + new_start = std::min(overlap->start, new_start); + new_end = std::max(overlap->end, new_end); + write_inheritance |= overlap->is_written; + modified_inheritance |= overlap->is_modified; } GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; for (auto& overlap : overlaps) { Unregister(overlap); } UpdateBlock(block, new_start, new_end, overlaps); - MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); + + const MapInterval new_map{new_start, new_end, new_gpu_addr}; + MapInterval* const map = Register(new_map, write_inheritance); + if (!map) { + return nullptr; + } if (modified_inheritance) { - new_map->MarkAsModified(true, GetModifiedTicks()); + map->MarkAsModified(true, GetModifiedTicks()); if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { - MarkForAsyncFlush(new_map); + MarkForAsyncFlush(map); } } - Register(new_map, write_inheritance); - return new_map; + return map; } void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, - std::vector<MapInterval>& overlaps) { + const VectorMapInterval& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; interval_set.add(base_interval); for (auto& overlap : overlaps) { - const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; + const IntervalType subtract{overlap->start, overlap->end}; interval_set.subtract(subtract); } for (auto& interval : interval_set) { @@ -386,18 +390,24 @@ private: } } - std::vector<MapInterval> GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval result; if (size == 0) { - return {}; + return result; } - std::vector<MapInterval> objects{}; - const IntervalType interval{addr, addr + size}; - for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { - objects.push_back(pair.second); + const VAddr addr_end = addr + size; + auto it = mapped_addresses.lower_bound(addr); + if (it != mapped_addresses.begin()) { + --it; } - - return objects; + while (it != mapped_addresses.end() && it->start < addr_end) { + if (it->Overlaps(addr, addr_end)) { + result.push_back(&*it); + } + ++it; + } + return result; } /// Returns a ticks counter used for tracking when cached objects were last modified @@ -405,12 +415,12 @@ private: return ++modified_ticks; } - void FlushMap(MapInterval map) { - std::size_t size = map->GetEnd() - map->GetStart(); - OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; + void FlushMap(MapInterval* map) { + const std::size_t size = map->end - map->start; + OwnerBuffer block = blocks[map->start >> block_page_bits]; staging_buffer.resize(size); - DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); - system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); + DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); + system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -515,7 +525,7 @@ private: } else { written_pages[page_start] = 1; } - page_start++; + ++page_start; } } @@ -531,7 +541,7 @@ private: written_pages.erase(it); } } - page_start++; + ++page_start; } } @@ -542,14 +552,14 @@ private: if (written_pages.count(page_start) > 0) { return true; } - page_start++; + ++page_start; } return false; } - void MarkForAsyncFlush(MapInterval& map) { + void MarkForAsyncFlush(MapInterval* map) { if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>(); + uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval*>>(); } uncommitted_flushes->insert(map); } @@ -566,10 +576,9 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalSet = boost::icl::interval_set<VAddr>; - using IntervalCache = boost::icl::interval_map<VAddr, MapInterval>; - using IntervalType = typename IntervalCache::interval_type; - IntervalCache mapped_addresses; + MapIntervalAllocator mapped_addresses_allocator; + boost::intrusive::set<MapInterval, boost::intrusive::compare<MapIntervalCompare>> + mapped_addresses; static constexpr u64 write_page_bit = 11; std::unordered_map<u64, u32> written_pages; @@ -583,10 +592,10 @@ private: u64 modified_ticks = 0; std::vector<u8> staging_buffer; - std::list<MapInterval> marked_for_unregister; + std::list<MapInterval*> marked_for_unregister; - std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{}; - std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes; + std::shared_ptr<std::unordered_set<MapInterval*>> uncommitted_flushes; + std::list<std::shared_ptr<std::list<MapInterval*>>> committed_flushes; std::recursive_mutex mutex; }; diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp new file mode 100644 index 000000000..62587e18a --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.cpp @@ -0,0 +1,33 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <cstddef> +#include <memory> + +#include "video_core/buffer_cache/map_interval.h" + +namespace VideoCommon { + +MapIntervalAllocator::MapIntervalAllocator() { + FillFreeList(first_chunk); +} + +MapIntervalAllocator::~MapIntervalAllocator() = default; + +void MapIntervalAllocator::AllocateNewChunk() { + *new_chunk = std::make_unique<Chunk>(); + FillFreeList(**new_chunk); + new_chunk = &(*new_chunk)->next; +} + +void MapIntervalAllocator::FillFreeList(Chunk& chunk) { + const std::size_t old_size = free_list.size(); + free_list.resize(old_size + chunk.data.size()); + std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, + [](MapInterval& interval) { return &interval; }); +} + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 29d8b26f3..fe0bcd1d8 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -4,104 +4,89 @@ #pragma once +#include <array> +#include <cstddef> +#include <memory> +#include <vector> + +#include <boost/intrusive/set_hook.hpp> + #include "common/common_types.h" #include "video_core/gpu.h" namespace VideoCommon { -class MapIntervalBase { -public: - MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) - : start{start}, end{end}, gpu_addr{gpu_addr} {} - - void SetCpuAddress(VAddr new_cpu_addr) { - cpu_addr = new_cpu_addr; - } - - VAddr GetCpuAddress() const { - return cpu_addr; - } - - GPUVAddr GetGpuAddress() const { - return gpu_addr; - } - - bool IsInside(const VAddr other_start, const VAddr other_end) const { - return (start <= other_start && other_end <= end); - } - - bool operator==(const MapIntervalBase& rhs) const { - return std::tie(start, end) == std::tie(rhs.start, rhs.end); - } - - bool operator!=(const MapIntervalBase& rhs) const { - return !operator==(rhs); - } +struct MapInterval : public boost::intrusive::set_base_hook<boost::intrusive::optimize_size<true>> { + MapInterval() = default; - void MarkAsRegistered(const bool registered) { - is_registered = registered; - } + /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} - bool IsRegistered() const { - return is_registered; - } + explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept + : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} - void SetMemoryMarked(bool is_memory_marked_) { - is_memory_marked = is_memory_marked_; + bool IsInside(VAddr other_start, VAddr other_end) const noexcept { + return start <= other_start && other_end <= end; } - bool IsMemoryMarked() const { - return is_memory_marked; + bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { + return start < other_end && other_start < end; } - void SetSyncPending(bool is_sync_pending_) { - is_sync_pending = is_sync_pending_; - } + void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { + is_modified = is_modified_; + ticks = ticks_; + } + + boost::intrusive::set_member_hook<> member_hook_; + VAddr start = 0; + VAddr end = 0; + GPUVAddr gpu_addr = 0; + u64 ticks = 0; + bool is_written = false; + bool is_modified = false; + bool is_registered = false; + bool is_memory_marked = false; + bool is_sync_pending = false; +}; - bool IsSyncPending() const { - return is_sync_pending; +struct MapIntervalCompare { + constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { + return lhs.start < rhs.start; } +}; - VAddr GetStart() const { - return start; - } +class MapIntervalAllocator { +public: + MapIntervalAllocator(); + ~MapIntervalAllocator(); - VAddr GetEnd() const { - return end; + MapInterval* Allocate() { + if (free_list.empty()) { + AllocateNewChunk(); + } + MapInterval* const interval = free_list.back(); + free_list.pop_back(); + return interval; } - void MarkAsModified(const bool is_modified_, const u64 tick) { - is_modified = is_modified_; - ticks = tick; + void Release(MapInterval* interval) { + free_list.push_back(interval); } - bool IsModified() const { - return is_modified; - } +private: + struct Chunk { + std::unique_ptr<Chunk> next; + std::array<MapInterval, 0x8000> data; + }; - u64 GetModificationTick() const { - return ticks; - } + void AllocateNewChunk(); - void MarkAsWritten(const bool is_written_) { - is_written = is_written_; - } + void FillFreeList(Chunk& chunk); - bool IsWritten() const { - return is_written; - } + std::vector<MapInterval*> free_list; + std::unique_ptr<Chunk>* new_chunk = &first_chunk.next; -private: - VAddr start; - VAddr end; - GPUVAddr gpu_addr; - VAddr cpu_addr{}; - bool is_written{}; - bool is_modified{}; - bool is_registered{}; - bool is_memory_marked{}; - bool is_sync_pending{}; - u64 ticks{}; + Chunk first_chunk; }; } // namespace VideoCommon diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d2cab50bd..9964ea894 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 99ddcb3f8..ec5421afa 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_fence_manager.h" namespace OpenGL { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 960ebf1a1..c83a08d42 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2309,6 +2309,18 @@ private: return {"gl_SubGroupInvocationARB", Type::Uint}; } + template <const std::string_view& comparison> + Expression ThreadMask(Operation) { + if (device.HasWarpIntrinsics()) { + return {fmt::format("gl_Thread{}MaskNV", comparison), Type::Uint}; + } + if (device.HasShaderBallot()) { + return {fmt::format("uint(gl_SubGroup{}MaskARB)", comparison), Type::Uint}; + } + LOG_ERROR(Render_OpenGL, "Thread mask intrinsics are required by the shader"); + return {"0U", Type::Uint}; + } + Expression ShuffleIndexed(Operation operation) { std::string value = VisitOperand(operation, 0).AsFloat(); @@ -2337,6 +2349,12 @@ private: static constexpr std::string_view NotEqual = "!="; static constexpr std::string_view GreaterEqual = ">="; + static constexpr std::string_view Eq = "Eq"; + static constexpr std::string_view Ge = "Ge"; + static constexpr std::string_view Gt = "Gt"; + static constexpr std::string_view Le = "Le"; + static constexpr std::string_view Lt = "Lt"; + static constexpr std::string_view Add = "Add"; static constexpr std::string_view Min = "Min"; static constexpr std::string_view Max = "Max"; @@ -2554,6 +2572,11 @@ private: &GLSLDecompiler::VoteEqual, &GLSLDecompiler::ThreadId, + &GLSLDecompiler::ThreadMask<Func::Eq>, + &GLSLDecompiler::ThreadMask<Func::Ge>, + &GLSLDecompiler::ThreadMask<Func::Gt>, + &GLSLDecompiler::ThreadMask<Func::Le>, + &GLSLDecompiler::ThreadMask<Func::Lt>, &GLSLDecompiler::ShuffleIndexed, &GLSLDecompiler::MemoryBarrierGL, diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5b494da8c..5f33d9e40 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,6 +7,7 @@ #include <memory> #include "core/core.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 04d07fe6a..043fe7947 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -7,6 +7,7 @@ #include <memory> #include "video_core/fence_manager.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 167e20e91..f4ccc9848 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -515,6 +515,16 @@ private: void DeclareCommon() { thread_id = DeclareInputBuiltIn(spv::BuiltIn::SubgroupLocalInvocationId, t_in_uint, "thread_id"); + thread_masks[0] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupEqMask, t_in_uint4, "thread_eq_mask"); + thread_masks[1] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupGeMask, t_in_uint4, "thread_ge_mask"); + thread_masks[2] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupGtMask, t_in_uint4, "thread_gt_mask"); + thread_masks[3] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupLeMask, t_in_uint4, "thread_le_mask"); + thread_masks[4] = + DeclareInputBuiltIn(spv::BuiltIn::SubgroupLtMask, t_in_uint4, "thread_lt_mask"); } void DeclareVertex() { @@ -2175,6 +2185,13 @@ private: return {OpLoad(t_uint, thread_id), Type::Uint}; } + template <std::size_t index> + Expression ThreadMask(Operation) { + // TODO(Rodrigo): Handle devices with different warp sizes + const Id mask = thread_masks[index]; + return {OpLoad(t_uint, AccessElement(t_in_uint, mask, 0)), Type::Uint}; + } + Expression ShuffleIndexed(Operation operation) { const Id value = AsFloat(Visit(operation[0])); const Id index = AsUint(Visit(operation[1])); @@ -2639,6 +2656,11 @@ private: &SPIRVDecompiler::Vote<&Module::OpSubgroupAllEqualKHR>, &SPIRVDecompiler::ThreadId, + &SPIRVDecompiler::ThreadMask<0>, // Eq + &SPIRVDecompiler::ThreadMask<1>, // Ge + &SPIRVDecompiler::ThreadMask<2>, // Gt + &SPIRVDecompiler::ThreadMask<3>, // Le + &SPIRVDecompiler::ThreadMask<4>, // Lt &SPIRVDecompiler::ShuffleIndexed, &SPIRVDecompiler::MemoryBarrierGL, @@ -2763,6 +2785,7 @@ private: Id workgroup_id{}; Id local_invocation_id{}; Id thread_id{}; + std::array<Id, 5> thread_masks{}; // eq, ge, gt, le, lt VertexIndices in_indices; VertexIndices out_indices; diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp index d4f95b18c..399a455c4 100644 --- a/src/video_core/shader/decode/other.cpp +++ b/src/video_core/shader/decode/other.cpp @@ -109,6 +109,27 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { return Operation(OperationCode::WorkGroupIdY); case SystemVariable::CtaIdZ: return Operation(OperationCode::WorkGroupIdZ); + case SystemVariable::EqMask: + case SystemVariable::LtMask: + case SystemVariable::LeMask: + case SystemVariable::GtMask: + case SystemVariable::GeMask: + uses_warps = true; + switch (instr.sys20) { + case SystemVariable::EqMask: + return Operation(OperationCode::ThreadEqMask); + case SystemVariable::LtMask: + return Operation(OperationCode::ThreadLtMask); + case SystemVariable::LeMask: + return Operation(OperationCode::ThreadLeMask); + case SystemVariable::GtMask: + return Operation(OperationCode::ThreadGtMask); + case SystemVariable::GeMask: + return Operation(OperationCode::ThreadGeMask); + default: + UNREACHABLE(); + return Immediate(0u); + } default: UNIMPLEMENTED_MSG("Unhandled system move: {}", static_cast<u32>(instr.sys20.Value())); diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index f75b62240..cce8aeebe 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -226,6 +226,11 @@ enum class OperationCode { VoteEqual, /// (bool) -> bool ThreadId, /// () -> uint + ThreadEqMask, /// () -> uint + ThreadGeMask, /// () -> uint + ThreadGtMask, /// () -> uint + ThreadLeMask, /// () -> uint + ThreadLtMask, /// () -> uint ShuffleIndexed, /// (uint value, uint index) -> uint MemoryBarrierGL, /// () -> void |