summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt5
-rw-r--r--src/video_core/buffer_cache.h299
-rw-r--r--src/video_core/buffer_cache/buffer_block.h76
-rw-r--r--src/video_core/buffer_cache/buffer_cache.h447
-rw-r--r--src/video_core/buffer_cache/map_interval.h89
-rw-r--r--src/video_core/engines/fermi_2d.cpp3
-rw-r--r--src/video_core/engines/fermi_2d.h3
-rw-r--r--src/video_core/engines/kepler_memory.cpp2
-rw-r--r--src/video_core/engines/kepler_memory.h1
-rw-r--r--src/video_core/engines/maxwell_3d.cpp22
-rw-r--r--src/video_core/engines/maxwell_3d.h2
-rw-r--r--src/video_core/engines/maxwell_dma.cpp48
-rw-r--r--src/video_core/engines/maxwell_dma.h9
-rw-r--r--src/video_core/engines/shader_bytecode.h25
-rw-r--r--src/video_core/gpu.cpp16
-rw-r--r--src/video_core/gpu.h15
-rw-r--r--src/video_core/gpu_asynch.cpp5
-rw-r--r--src/video_core/gpu_asynch.h5
-rw-r--r--src/video_core/gpu_synch.cpp5
-rw-r--r--src/video_core/gpu_synch.h5
-rw-r--r--src/video_core/gpu_thread.cpp8
-rw-r--r--src/video_core/gpu_thread.h3
-rw-r--r--src/video_core/morton.cpp116
-rw-r--r--src/video_core/morton.h3
-rw-r--r--src/video_core/rasterizer_interface.h2
-rw-r--r--src/video_core/renderer_base.h3
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp52
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h39
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_device.h11
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp14
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp955
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h2
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp89
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_shader_decompiler.cpp25
-rw-r--r--src/video_core/shader/decode.cpp1
-rw-r--r--src/video_core/shader/decode/conversion.cpp22
-rw-r--r--src/video_core/shader/decode/float_set.cpp1
-rw-r--r--src/video_core/shader/decode/float_set_predicate.cpp10
-rw-r--r--src/video_core/shader/decode/half_set_predicate.cpp19
-rw-r--r--src/video_core/shader/decode/integer_set.cpp1
-rw-r--r--src/video_core/shader/decode/integer_set_predicate.cpp1
-rw-r--r--src/video_core/shader/decode/other.cpp7
-rw-r--r--src/video_core/shader/decode/predicate_set_register.cpp1
-rw-r--r--src/video_core/shader/decode/warp.cpp55
-rw-r--r--src/video_core/shader/node.h5
-rw-r--r--src/video_core/shader/shader_ir.cpp5
-rw-r--r--src/video_core/shader/shader_ir.h4
-rw-r--r--src/video_core/surface.cpp5
-rw-r--r--src/video_core/texture_cache/surface_params.h1
-rw-r--r--src/video_core/texture_cache/texture_cache.h2
-rw-r--r--src/video_core/textures/decoders.cpp14
-rw-r--r--src/video_core/textures/decoders.h3
-rw-r--r--src/video_core/textures/texture.h2
58 files changed, 1572 insertions, 1051 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 7c18c27b3..e2f85c5f1 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -1,5 +1,7 @@
add_library(video_core STATIC
- buffer_cache.h
+ buffer_cache/buffer_block.h
+ buffer_cache/buffer_cache.h
+ buffer_cache/map_interval.h
dma_pusher.cpp
dma_pusher.h
debug_utils/debug_utils.cpp
@@ -100,6 +102,7 @@ add_library(video_core STATIC
shader/decode/integer_set.cpp
shader/decode/half_set.cpp
shader/decode/video.cpp
+ shader/decode/warp.cpp
shader/decode/xmad.cpp
shader/decode/other.cpp
shader/control_flow.cpp
diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h
deleted file mode 100644
index 6f868b8b4..000000000
--- a/src/video_core/buffer_cache.h
+++ /dev/null
@@ -1,299 +0,0 @@
-// Copyright 2019 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <memory>
-#include <mutex>
-#include <unordered_map>
-#include <unordered_set>
-#include <utility>
-#include <vector>
-
-#include "common/alignment.h"
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_cache.h"
-
-namespace VideoCore {
-class RasterizerInterface;
-}
-
-namespace VideoCommon {
-
-template <typename BufferStorageType>
-class CachedBuffer final : public RasterizerCacheObject {
-public:
- explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr)
- : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {}
- ~CachedBuffer() override = default;
-
- VAddr GetCpuAddr() const override {
- return cpu_addr;
- }
-
- std::size_t GetSizeInBytes() const override {
- return size;
- }
-
- u8* GetWritableHostPtr() const {
- return host_ptr;
- }
-
- std::size_t GetSize() const {
- return size;
- }
-
- std::size_t GetCapacity() const {
- return capacity;
- }
-
- bool IsInternalized() const {
- return is_internal;
- }
-
- const BufferStorageType& GetBuffer() const {
- return buffer;
- }
-
- void SetSize(std::size_t new_size) {
- size = new_size;
- }
-
- void SetInternalState(bool is_internal_) {
- is_internal = is_internal_;
- }
-
- BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) {
- capacity = new_capacity;
- std::swap(buffer, buffer_);
- return buffer_;
- }
-
-private:
- u8* host_ptr{};
- VAddr cpu_addr{};
- std::size_t size{};
- std::size_t capacity{};
- bool is_internal{};
- BufferStorageType buffer;
-};
-
-template <typename BufferStorageType, typename BufferType, typename StreamBuffer>
-class BufferCache : public RasterizerCache<std::shared_ptr<CachedBuffer<BufferStorageType>>> {
-public:
- using Buffer = std::shared_ptr<CachedBuffer<BufferStorageType>>;
- using BufferInfo = std::pair<const BufferType*, u64>;
-
- explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
- std::unique_ptr<StreamBuffer> stream_buffer)
- : RasterizerCache<Buffer>{rasterizer}, system{system},
- stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{
- this->stream_buffer->GetHandle()} {}
- ~BufferCache() = default;
-
- void Unregister(const Buffer& entry) override {
- std::lock_guard lock{RasterizerCache<Buffer>::mutex};
- if (entry->IsInternalized()) {
- internalized_entries.erase(entry->GetCacheAddr());
- }
- ReserveBuffer(entry);
- RasterizerCache<Buffer>::Unregister(entry);
- }
-
- void TickFrame() {
- marked_for_destruction_index =
- (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size();
- MarkedForDestruction().clear();
- }
-
- BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
- bool internalize = false, bool is_written = false) {
- std::lock_guard lock{RasterizerCache<Buffer>::mutex};
-
- auto& memory_manager = system.GPU().MemoryManager();
- const auto host_ptr = memory_manager.GetPointer(gpu_addr);
- if (!host_ptr) {
- return {GetEmptyBuffer(size), 0};
- }
- const auto cache_addr = ToCacheAddr(host_ptr);
-
- // Cache management is a big overhead, so only cache entries with a given size.
- // TODO: Figure out which size is the best for given games.
- constexpr std::size_t max_stream_size = 0x800;
- if (!internalize && size < max_stream_size &&
- internalized_entries.find(cache_addr) == internalized_entries.end()) {
- return StreamBufferUpload(host_ptr, size, alignment);
- }
-
- auto entry = RasterizerCache<Buffer>::TryGet(cache_addr);
- if (!entry) {
- return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written);
- }
-
- if (entry->GetSize() < size) {
- IncreaseBufferSize(entry, size);
- }
- if (is_written) {
- entry->MarkAsModified(true, *this);
- }
- return {ToHandle(entry->GetBuffer()), 0};
- }
-
- /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
- BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
- std::size_t alignment = 4) {
- std::lock_guard lock{RasterizerCache<Buffer>::mutex};
- return StreamBufferUpload(raw_pointer, size, alignment);
- }
-
- void Map(std::size_t max_size) {
- std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
- buffer_offset = buffer_offset_base;
- }
-
- /// Finishes the upload stream, returns true on bindings invalidation.
- bool Unmap() {
- stream_buffer->Unmap(buffer_offset - buffer_offset_base);
- return std::exchange(invalidated, false);
- }
-
- virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0;
-
-protected:
- void FlushObjectInner(const Buffer& entry) override {
- DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr());
- }
-
- virtual BufferStorageType CreateBuffer(std::size_t size) = 0;
-
- virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0;
-
- virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset,
- std::size_t size, const u8* data) = 0;
-
- virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset,
- std::size_t size, u8* data) = 0;
-
- virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst,
- std::size_t src_offset, std::size_t dst_offset,
- std::size_t size) = 0;
-
-private:
- BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
- std::size_t alignment) {
- AlignBuffer(alignment);
- const std::size_t uploaded_offset = buffer_offset;
- std::memcpy(buffer_ptr, raw_pointer, size);
-
- buffer_ptr += size;
- buffer_offset += size;
- return {&stream_buffer_handle, uploaded_offset};
- }
-
- BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
- bool internalize, bool is_written) {
- auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
- ASSERT(cpu_addr);
-
- auto entry = GetUncachedBuffer(*cpu_addr, host_ptr);
- entry->SetSize(size);
- entry->SetInternalState(internalize);
- RasterizerCache<Buffer>::Register(entry);
-
- if (internalize) {
- internalized_entries.emplace(ToCacheAddr(host_ptr));
- }
- if (is_written) {
- entry->MarkAsModified(true, *this);
- }
-
- if (entry->GetCapacity() < size) {
- MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size));
- }
-
- UploadBufferData(entry->GetBuffer(), 0, size, host_ptr);
- return {ToHandle(entry->GetBuffer()), 0};
- }
-
- void IncreaseBufferSize(Buffer& entry, std::size_t new_size) {
- const std::size_t old_size = entry->GetSize();
- if (entry->GetCapacity() < new_size) {
- const auto& old_buffer = entry->GetBuffer();
- auto new_buffer = CreateBuffer(new_size);
-
- // Copy bits from the old buffer to the new buffer.
- CopyBufferData(old_buffer, new_buffer, 0, 0, old_size);
- MarkedForDestruction().push_back(
- entry->ExchangeBuffer(std::move(new_buffer), new_size));
-
- // This buffer could have been used
- invalidated = true;
- }
- // Upload the new bits.
- const std::size_t size_diff = new_size - old_size;
- UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
-
- // Update entry's size in the object and in the cache.
- Unregister(entry);
-
- entry->SetSize(new_size);
- RasterizerCache<Buffer>::Register(entry);
- }
-
- Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
- if (auto entry = TryGetReservedBuffer(host_ptr)) {
- return entry;
- }
- return std::make_shared<CachedBuffer<BufferStorageType>>(cpu_addr, host_ptr);
- }
-
- Buffer TryGetReservedBuffer(u8* host_ptr) {
- const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
- if (it == buffer_reserve.end()) {
- return {};
- }
- auto& reserve = it->second;
- auto entry = reserve.back();
- reserve.pop_back();
- return entry;
- }
-
- void ReserveBuffer(Buffer entry) {
- buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
- }
-
- void AlignBuffer(std::size_t alignment) {
- // Align the offset, not the mapped pointer
- const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
- buffer_ptr += offset_aligned - buffer_offset;
- buffer_offset = offset_aligned;
- }
-
- std::vector<BufferStorageType>& MarkedForDestruction() {
- return marked_for_destruction_ring_buffer[marked_for_destruction_index];
- }
-
- Core::System& system;
-
- std::unique_ptr<StreamBuffer> stream_buffer;
- BufferType stream_buffer_handle{};
-
- bool invalidated = false;
-
- u8* buffer_ptr = nullptr;
- u64 buffer_offset = 0;
- u64 buffer_offset_base = 0;
-
- std::size_t marked_for_destruction_index = 0;
- std::array<std::vector<BufferStorageType>, 4> marked_for_destruction_ring_buffer;
-
- std::unordered_set<CacheAddr> internalized_entries;
- std::unordered_map<CacheAddr, std::vector<Buffer>> buffer_reserve;
-};
-
-} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h
new file mode 100644
index 000000000..4b9193182
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_block.h
@@ -0,0 +1,76 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_set>
+#include <utility>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+
+namespace VideoCommon {
+
+class BufferBlock {
+public:
+ bool Overlaps(const CacheAddr start, const CacheAddr end) const {
+ return (cache_addr < end) && (cache_addr_end > start);
+ }
+
+ bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
+ return cache_addr <= other_start && other_end <= cache_addr_end;
+ }
+
+ u8* GetWritableHostPtr() const {
+ return FromCacheAddr(cache_addr);
+ }
+
+ u8* GetWritableHostPtr(std::size_t offset) const {
+ return FromCacheAddr(cache_addr + offset);
+ }
+
+ std::size_t GetOffset(const CacheAddr in_addr) {
+ return static_cast<std::size_t>(in_addr - cache_addr);
+ }
+
+ CacheAddr GetCacheAddr() const {
+ return cache_addr;
+ }
+
+ CacheAddr GetCacheAddrEnd() const {
+ return cache_addr_end;
+ }
+
+ void SetCacheAddr(const CacheAddr new_addr) {
+ cache_addr = new_addr;
+ cache_addr_end = new_addr + size;
+ }
+
+ std::size_t GetSize() const {
+ return size;
+ }
+
+ void SetEpoch(u64 new_epoch) {
+ epoch = new_epoch;
+ }
+
+ u64 GetEpoch() {
+ return epoch;
+ }
+
+protected:
+ explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} {
+ SetCacheAddr(cache_addr);
+ }
+ ~BufferBlock() = default;
+
+private:
+ CacheAddr cache_addr{};
+ CacheAddr cache_addr_end{};
+ std::size_t size{};
+ u64 epoch{};
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
new file mode 100644
index 000000000..2442ddfd6
--- /dev/null
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -0,0 +1,447 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/buffer_cache/buffer_block.h"
+#include "video_core/buffer_cache/map_interval.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCommon {
+
+using MapInterval = std::shared_ptr<MapIntervalBase>;
+
+template <typename TBuffer, typename TBufferType, typename StreamBuffer>
+class BufferCache {
+public:
+ using BufferInfo = std::pair<const TBufferType*, u64>;
+
+ BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+ bool is_written = false) {
+ std::lock_guard lock{mutex};
+
+ auto& memory_manager = system.GPU().MemoryManager();
+ const auto host_ptr = memory_manager.GetPointer(gpu_addr);
+ if (!host_ptr) {
+ return {GetEmptyBuffer(size), 0};
+ }
+ const auto cache_addr = ToCacheAddr(host_ptr);
+
+ // Cache management is a big overhead, so only cache entries with a given size.
+ // TODO: Figure out which size is the best for given games.
+ constexpr std::size_t max_stream_size = 0x800;
+ if (size < max_stream_size) {
+ if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
+ return StreamBufferUpload(host_ptr, size, alignment);
+ }
+ }
+
+ auto block = GetBlock(cache_addr, size);
+ auto map = MapAddress(block, gpu_addr, cache_addr, size);
+ if (is_written) {
+ map->MarkAsModified(true, GetModifiedTicks());
+ if (!map->IsWritten()) {
+ map->MarkAsWritten(true);
+ MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ }
+ } else {
+ if (map->IsWritten()) {
+ WriteBarrier();
+ }
+ }
+
+ const u64 offset = static_cast<u64>(block->GetOffset(cache_addr));
+
+ return {ToHandle(block), offset};
+ }
+
+ /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
+ BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
+ std::size_t alignment = 4) {
+ std::lock_guard lock{mutex};
+ return StreamBufferUpload(raw_pointer, size, alignment);
+ }
+
+ void Map(std::size_t max_size) {
+ std::lock_guard lock{mutex};
+
+ std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
+ buffer_offset = buffer_offset_base;
+ }
+
+ /// Finishes the upload stream, returns true on bindings invalidation.
+ bool Unmap() {
+ std::lock_guard lock{mutex};
+
+ stream_buffer->Unmap(buffer_offset - buffer_offset_base);
+ return std::exchange(invalidated, false);
+ }
+
+ void TickFrame() {
+ ++epoch;
+ while (!pending_destruction.empty()) {
+ if (pending_destruction.front()->GetEpoch() + 1 > epoch) {
+ break;
+ }
+ pending_destruction.pop_front();
+ }
+ }
+
+ /// Write any cached resources overlapping the specified region back to memory
+ void FlushRegion(CacheAddr addr, std::size_t size) {
+ std::lock_guard lock{mutex};
+
+ std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+ std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) {
+ return a->GetModificationTick() < b->GetModificationTick();
+ });
+ for (auto& object : objects) {
+ if (object->IsModified() && object->IsRegistered()) {
+ FlushMap(object);
+ }
+ }
+ }
+
+ /// Mark the specified region as being invalidated
+ void InvalidateRegion(CacheAddr addr, u64 size) {
+ std::lock_guard lock{mutex};
+
+ std::vector<MapInterval> objects = GetMapsInRange(addr, size);
+ for (auto& object : objects) {
+ if (object->IsRegistered()) {
+ Unregister(object);
+ }
+ }
+ }
+
+ virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0;
+
+protected:
+ explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
+ std::unique_ptr<StreamBuffer> stream_buffer)
+ : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)},
+ stream_buffer_handle{this->stream_buffer->GetHandle()} {}
+
+ ~BufferCache() = default;
+
+ virtual const TBufferType* ToHandle(const TBuffer& storage) = 0;
+
+ virtual void WriteBarrier() = 0;
+
+ virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0;
+
+ virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
+ const u8* data) = 0;
+
+ virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size,
+ u8* data) = 0;
+
+ virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
+ std::size_t dst_offset, std::size_t size) = 0;
+
+ /// Register an object into the cache
+ void Register(const MapInterval& new_map, bool inherit_written = false) {
+ const CacheAddr cache_ptr = new_map->GetStart();
+ const std::optional<VAddr> cpu_addr =
+ system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress());
+ if (!cache_ptr || !cpu_addr) {
+ LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}",
+ new_map->GetGpuAddress());
+ return;
+ }
+ const std::size_t size = new_map->GetEnd() - new_map->GetStart();
+ new_map->SetCpuAddress(*cpu_addr);
+ new_map->MarkAsRegistered(true);
+ const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
+ mapped_addresses.insert({interval, new_map});
+ rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
+ if (inherit_written) {
+ MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
+ new_map->MarkAsWritten(true);
+ }
+ }
+
+ /// Unregisters an object from the cache
+ void Unregister(MapInterval& map) {
+ const std::size_t size = map->GetEnd() - map->GetStart();
+ rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1);
+ map->MarkAsRegistered(false);
+ if (map->IsWritten()) {
+ UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
+ }
+ const IntervalType delete_interval{map->GetStart(), map->GetEnd()};
+ mapped_addresses.erase(delete_interval);
+ }
+
+private:
+ MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) {
+ return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
+ }
+
+ MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr,
+ const CacheAddr cache_addr, const std::size_t size) {
+
+ std::vector<MapInterval> overlaps = GetMapsInRange(cache_addr, size);
+ if (overlaps.empty()) {
+ const CacheAddr cache_addr_end = cache_addr + size;
+ MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr);
+ u8* host_ptr = FromCacheAddr(cache_addr);
+ UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr);
+ Register(new_map);
+ return new_map;
+ }
+
+ const CacheAddr cache_addr_end = cache_addr + size;
+ if (overlaps.size() == 1) {
+ MapInterval& current_map = overlaps[0];
+ if (current_map->IsInside(cache_addr, cache_addr_end)) {
+ return current_map;
+ }
+ }
+ CacheAddr new_start = cache_addr;
+ CacheAddr new_end = cache_addr_end;
+ bool write_inheritance = false;
+ bool modified_inheritance = false;
+ // Calculate new buffer parameters
+ for (auto& overlap : overlaps) {
+ new_start = std::min(overlap->GetStart(), new_start);
+ new_end = std::max(overlap->GetEnd(), new_end);
+ write_inheritance |= overlap->IsWritten();
+ modified_inheritance |= overlap->IsModified();
+ }
+ GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr;
+ for (auto& overlap : overlaps) {
+ Unregister(overlap);
+ }
+ UpdateBlock(block, new_start, new_end, overlaps);
+ MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
+ if (modified_inheritance) {
+ new_map->MarkAsModified(true, GetModifiedTicks());
+ }
+ Register(new_map, write_inheritance);
+ return new_map;
+ }
+
+ void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end,
+ std::vector<MapInterval>& overlaps) {
+ const IntervalType base_interval{start, end};
+ IntervalSet interval_set{};
+ interval_set.add(base_interval);
+ for (auto& overlap : overlaps) {
+ const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()};
+ interval_set.subtract(subtract);
+ }
+ for (auto& interval : interval_set) {
+ std::size_t size = interval.upper() - interval.lower();
+ if (size > 0) {
+ u8* host_ptr = FromCacheAddr(interval.lower());
+ UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr);
+ }
+ }
+ }
+
+ std::vector<MapInterval> GetMapsInRange(CacheAddr addr, std::size_t size) {
+ if (size == 0) {
+ return {};
+ }
+
+ std::vector<MapInterval> objects{};
+ const IntervalType interval{addr, addr + size};
+ for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) {
+ objects.push_back(pair.second);
+ }
+
+ return objects;
+ }
+
+ /// Returns a ticks counter used for tracking when cached objects were last modified
+ u64 GetModifiedTicks() {
+ return ++modified_ticks;
+ }
+
+ void FlushMap(MapInterval map) {
+ std::size_t size = map->GetEnd() - map->GetStart();
+ TBuffer block = blocks[map->GetStart() >> block_page_bits];
+ u8* host_ptr = FromCacheAddr(map->GetStart());
+ DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr);
+ map->MarkAsModified(false, 0);
+ }
+
+ BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size,
+ std::size_t alignment) {
+ AlignBuffer(alignment);
+ const std::size_t uploaded_offset = buffer_offset;
+ std::memcpy(buffer_ptr, raw_pointer, size);
+
+ buffer_ptr += size;
+ buffer_offset += size;
+ return {&stream_buffer_handle, uploaded_offset};
+ }
+
+ void AlignBuffer(std::size_t alignment) {
+ // Align the offset, not the mapped pointer
+ const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment);
+ buffer_ptr += offset_aligned - buffer_offset;
+ buffer_offset = offset_aligned;
+ }
+
+ TBuffer EnlargeBlock(TBuffer buffer) {
+ const std::size_t old_size = buffer->GetSize();
+ const std::size_t new_size = old_size + block_page_size;
+ const CacheAddr cache_addr = buffer->GetCacheAddr();
+ TBuffer new_buffer = CreateBlock(cache_addr, new_size);
+ CopyBlock(buffer, new_buffer, 0, 0, old_size);
+ buffer->SetEpoch(epoch);
+ pending_destruction.push_back(buffer);
+ const CacheAddr cache_addr_end = cache_addr + new_size - 1;
+ u64 page_start = cache_addr >> block_page_bits;
+ const u64 page_end = cache_addr_end >> block_page_bits;
+ while (page_start <= page_end) {
+ blocks[page_start] = new_buffer;
+ ++page_start;
+ }
+ return new_buffer;
+ }
+
+ TBuffer MergeBlocks(TBuffer first, TBuffer second) {
+ const std::size_t size_1 = first->GetSize();
+ const std::size_t size_2 = second->GetSize();
+ const CacheAddr first_addr = first->GetCacheAddr();
+ const CacheAddr second_addr = second->GetCacheAddr();
+ const CacheAddr new_addr = std::min(first_addr, second_addr);
+ const std::size_t new_size = size_1 + size_2;
+ TBuffer new_buffer = CreateBlock(new_addr, new_size);
+ CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
+ CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
+ first->SetEpoch(epoch);
+ second->SetEpoch(epoch);
+ pending_destruction.push_back(first);
+ pending_destruction.push_back(second);
+ const CacheAddr cache_addr_end = new_addr + new_size - 1;
+ u64 page_start = new_addr >> block_page_bits;
+ const u64 page_end = cache_addr_end >> block_page_bits;
+ while (page_start <= page_end) {
+ blocks[page_start] = new_buffer;
+ ++page_start;
+ }
+ return new_buffer;
+ }
+
+ TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) {
+ TBuffer found{};
+ const CacheAddr cache_addr_end = cache_addr + size - 1;
+ u64 page_start = cache_addr >> block_page_bits;
+ const u64 page_end = cache_addr_end >> block_page_bits;
+ while (page_start <= page_end) {
+ auto it = blocks.find(page_start);
+ if (it == blocks.end()) {
+ if (found) {
+ found = EnlargeBlock(found);
+ } else {
+ const CacheAddr start_addr = (page_start << block_page_bits);
+ found = CreateBlock(start_addr, block_page_size);
+ blocks[page_start] = found;
+ }
+ } else {
+ if (found) {
+ if (found == it->second) {
+ ++page_start;
+ continue;
+ }
+ found = MergeBlocks(found, it->second);
+ } else {
+ found = it->second;
+ }
+ }
+ ++page_start;
+ }
+ return found;
+ }
+
+ void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
+ u64 page_start = start >> write_page_bit;
+ const u64 page_end = end >> write_page_bit;
+ while (page_start <= page_end) {
+ auto it = written_pages.find(page_start);
+ if (it != written_pages.end()) {
+ it->second = it->second + 1;
+ } else {
+ written_pages[page_start] = 1;
+ }
+ page_start++;
+ }
+ }
+
+ void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) {
+ u64 page_start = start >> write_page_bit;
+ const u64 page_end = end >> write_page_bit;
+ while (page_start <= page_end) {
+ auto it = written_pages.find(page_start);
+ if (it != written_pages.end()) {
+ if (it->second > 1) {
+ it->second = it->second - 1;
+ } else {
+ written_pages.erase(it);
+ }
+ }
+ page_start++;
+ }
+ }
+
+ bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const {
+ u64 page_start = start >> write_page_bit;
+ const u64 page_end = end >> write_page_bit;
+ while (page_start <= page_end) {
+ if (written_pages.count(page_start) > 0) {
+ return true;
+ }
+ page_start++;
+ }
+ return false;
+ }
+
+ VideoCore::RasterizerInterface& rasterizer;
+ Core::System& system;
+ std::unique_ptr<StreamBuffer> stream_buffer;
+
+ TBufferType stream_buffer_handle{};
+
+ bool invalidated = false;
+
+ u8* buffer_ptr = nullptr;
+ u64 buffer_offset = 0;
+ u64 buffer_offset_base = 0;
+
+ using IntervalSet = boost::icl::interval_set<CacheAddr>;
+ using IntervalCache = boost::icl::interval_map<CacheAddr, MapInterval>;
+ using IntervalType = typename IntervalCache::interval_type;
+ IntervalCache mapped_addresses{};
+
+ static constexpr u64 write_page_bit{11};
+ std::unordered_map<u64, u32> written_pages{};
+
+ static constexpr u64 block_page_bits{21};
+ static constexpr u64 block_page_size{1 << block_page_bits};
+ std::unordered_map<u64, TBuffer> blocks{};
+
+ std::list<TBuffer> pending_destruction{};
+ u64 epoch{};
+ u64 modified_ticks{};
+
+ std::recursive_mutex mutex;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h
new file mode 100644
index 000000000..3a104d5cd
--- /dev/null
+++ b/src/video_core/buffer_cache/map_interval.h
@@ -0,0 +1,89 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "common/common_types.h"
+#include "video_core/gpu.h"
+
+namespace VideoCommon {
+
+class MapIntervalBase {
+public:
+ MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr)
+ : start{start}, end{end}, gpu_addr{gpu_addr} {}
+
+ void SetCpuAddress(VAddr new_cpu_addr) {
+ cpu_addr = new_cpu_addr;
+ }
+
+ VAddr GetCpuAddress() const {
+ return cpu_addr;
+ }
+
+ GPUVAddr GetGpuAddress() const {
+ return gpu_addr;
+ }
+
+ bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const {
+ return (start <= other_start && other_end <= end);
+ }
+
+ bool operator==(const MapIntervalBase& rhs) const {
+ return std::tie(start, end) == std::tie(rhs.start, rhs.end);
+ }
+
+ bool operator!=(const MapIntervalBase& rhs) const {
+ return !operator==(rhs);
+ }
+
+ void MarkAsRegistered(const bool registered) {
+ is_registered = registered;
+ }
+
+ bool IsRegistered() const {
+ return is_registered;
+ }
+
+ CacheAddr GetStart() const {
+ return start;
+ }
+
+ CacheAddr GetEnd() const {
+ return end;
+ }
+
+ void MarkAsModified(const bool is_modified_, const u64 tick) {
+ is_modified = is_modified_;
+ ticks = tick;
+ }
+
+ bool IsModified() const {
+ return is_modified;
+ }
+
+ u64 GetModificationTick() const {
+ return ticks;
+ }
+
+ void MarkAsWritten(const bool is_written_) {
+ is_written = is_written_;
+ }
+
+ bool IsWritten() const {
+ return is_written;
+ }
+
+private:
+ CacheAddr start;
+ CacheAddr end;
+ GPUVAddr gpu_addr;
+ VAddr cpu_addr{};
+ bool is_written{};
+ bool is_modified{};
+ bool is_registered{};
+ u64 ticks{};
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 0ee228e28..98a8b5337 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -10,8 +10,7 @@
namespace Tegra::Engines {
-Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager)
- : rasterizer{rasterizer}, memory_manager{memory_manager} {}
+Fermi2D::Fermi2D(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 05421d185..0901cf2fa 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -33,7 +33,7 @@ namespace Tegra::Engines {
class Fermi2D final {
public:
- explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer, MemoryManager& memory_manager);
+ explicit Fermi2D(VideoCore::RasterizerInterface& rasterizer);
~Fermi2D() = default;
/// Write the value to the register identified by method.
@@ -145,7 +145,6 @@ public:
private:
VideoCore::RasterizerInterface& rasterizer;
- MemoryManager& memory_manager;
/// Performs the copy from the source surface to the destination surface as configured in the
/// registers.
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 44279de00..fa4a7c5c1 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -15,7 +15,7 @@
namespace Tegra::Engines {
KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager)
- : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {}
+ : system{system}, upload_state{memory_manager, regs.upload} {}
KeplerMemory::~KeplerMemory() = default;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index f3bc675a9..e0e25c321 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -65,7 +65,6 @@ public:
private:
Core::System& system;
- MemoryManager& memory_manager;
Upload::State upload_state;
};
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 125c53360..f5158d219 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -249,16 +249,10 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
executing_macro = 0;
// Lookup the macro offset
- const u32 entry{(method - MacroRegistersStart) >> 1};
- const auto& search{macro_offsets.find(entry)};
- if (search == macro_offsets.end()) {
- LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
- UNREACHABLE();
- return;
- }
+ const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
// Execute the current macro.
- macro_interpreter.Execute(search->second, std::move(parameters));
+ macro_interpreter.Execute(macro_positions[entry], std::move(parameters));
}
void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
@@ -421,7 +415,7 @@ void Maxwell3D::ProcessMacroUpload(u32 data) {
}
void Maxwell3D::ProcessMacroBind(u32 data) {
- macro_offsets[regs.macros.entry] = data;
+ macro_positions[regs.macros.entry++] = data;
}
void Maxwell3D::ProcessQueryGet() {
@@ -524,7 +518,7 @@ void Maxwell3D::ProcessQueryCondition() {
void Maxwell3D::ProcessSyncPoint() {
const u32 sync_point = regs.sync_info.sync_point.Value();
const u32 increment = regs.sync_info.increment.Value();
- const u32 cache_flush = regs.sync_info.unknown.Value();
+ [[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
if (increment) {
system.GPU().IncrementSyncPoint(sync_point);
}
@@ -626,10 +620,10 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
- const auto r_type{tic_entry.r_type.Value()};
- const auto g_type{tic_entry.g_type.Value()};
- const auto b_type{tic_entry.b_type.Value()};
- const auto a_type{tic_entry.a_type.Value()};
+ [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
+ [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
+ [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
+ [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
// TODO(Subv): Different data types for separate components are not supported
DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1ee982b76..0184342a0 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1270,7 +1270,7 @@ private:
MemoryManager& memory_manager;
/// Start offsets of each macro in macro_memory
- std::unordered_map<u32, u32> macro_offsets;
+ std::array<u32, 0x80> macro_positions = {};
/// Memory for macro code
MacroMemory macro_memory;
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index a28c04473..ad8453c5f 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -5,18 +5,17 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
+#include "core/settings.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h"
#include "video_core/memory_manager.h"
-#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
namespace Tegra::Engines {
-MaxwellDMA::MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager)
- : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {}
+MaxwellDMA::MaxwellDMA(Core::System& system, MemoryManager& memory_manager)
+ : system{system}, memory_manager{memory_manager} {}
void MaxwellDMA::CallMethod(const GPU::MethodCall& method_call) {
ASSERT_MSG(method_call.method < Regs::NUM_REGS,
@@ -84,13 +83,17 @@ void MaxwellDMA::HandleCopy() {
ASSERT(regs.exec.enable_2d == 1);
if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) {
- ASSERT(regs.src_params.size_z == 1);
+ ASSERT(regs.src_params.BlockDepth() == 0);
// If the input is tiled and the output is linear, deswizzle the input and copy it over.
- const u32 src_bytes_per_pixel = regs.src_pitch / regs.src_params.size_x;
+ const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count;
const std::size_t src_size = Texture::CalculateSize(
- true, src_bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
+ true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y,
regs.src_params.size_z, regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
+ const std::size_t src_layer_size = Texture::CalculateSize(
+ true, bytes_per_pixel, regs.src_params.size_x, regs.src_params.size_y, 1,
+ regs.src_params.BlockHeight(), regs.src_params.BlockDepth());
+
const std::size_t dst_size = regs.dst_pitch * regs.y_count;
if (read_buffer.size() < src_size) {
@@ -104,23 +107,23 @@ void MaxwellDMA::HandleCopy() {
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
- Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch,
- regs.src_params.size_x, src_bytes_per_pixel, read_buffer.data(),
- write_buffer.data(), regs.src_params.BlockHeight(),
- regs.src_params.pos_x, regs.src_params.pos_y);
+ Texture::UnswizzleSubrect(
+ regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
+ read_buffer.data() + src_layer_size * regs.src_params.pos_z, write_buffer.data(),
+ regs.src_params.BlockHeight(), regs.src_params.pos_x, regs.src_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
} else {
ASSERT(regs.dst_params.BlockDepth() == 0);
- const u32 src_bytes_per_pixel = regs.src_pitch / regs.x_count;
+ const u32 bytes_per_pixel = regs.src_pitch / regs.x_count;
const std::size_t dst_size = Texture::CalculateSize(
- true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
+ true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y,
regs.dst_params.size_z, regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t dst_layer_size = Texture::CalculateSize(
- true, src_bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
+ true, bytes_per_pixel, regs.dst_params.size_x, regs.dst_params.size_y, 1,
regs.dst_params.BlockHeight(), regs.dst_params.BlockDepth());
const std::size_t src_size = regs.src_pitch * regs.y_count;
@@ -133,14 +136,19 @@ void MaxwellDMA::HandleCopy() {
write_buffer.resize(dst_size);
}
- memory_manager.ReadBlock(source, read_buffer.data(), src_size);
- memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+ if (Settings::values.use_accurate_gpu_emulation) {
+ memory_manager.ReadBlock(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
+ } else {
+ memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
+ memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
+ }
// If the input is linear and the output is tiled, swizzle the input and copy it over.
- Texture::SwizzleSubrect(regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x,
- src_bytes_per_pixel,
- write_buffer.data() + dst_layer_size * regs.dst_params.pos_z,
- read_buffer.data(), regs.dst_params.BlockHeight());
+ Texture::SwizzleSubrect(
+ regs.x_count, regs.y_count, regs.src_pitch, regs.dst_params.size_x, bytes_per_pixel,
+ write_buffer.data() + dst_layer_size * regs.dst_params.pos_z, read_buffer.data(),
+ regs.dst_params.BlockHeight(), regs.dst_params.pos_x, regs.dst_params.pos_y);
memory_manager.WriteBlock(dest, write_buffer.data(), dst_size);
}
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 17b015ca7..93808a9bb 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -20,10 +20,6 @@ namespace Tegra {
class MemoryManager;
}
-namespace VideoCore {
-class RasterizerInterface;
-}
-
namespace Tegra::Engines {
/**
@@ -33,8 +29,7 @@ namespace Tegra::Engines {
class MaxwellDMA final {
public:
- explicit MaxwellDMA(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- MemoryManager& memory_manager);
+ explicit MaxwellDMA(Core::System& system, MemoryManager& memory_manager);
~MaxwellDMA() = default;
/// Write the value to the register identified by method.
@@ -180,8 +175,6 @@ public:
private:
Core::System& system;
- VideoCore::RasterizerInterface& rasterizer;
-
MemoryManager& memory_manager;
std::vector<u8> read_buffer;
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index aaa1acea9..c3678b9ea 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -538,6 +538,12 @@ enum class PhysicalAttributeDirection : u64 {
Output = 1,
};
+enum class VoteOperation : u64 {
+ All = 0, // allThreadsNV
+ Any = 1, // anyThreadNV
+ Eq = 2, // allThreadsEqualNV
+};
+
union Instruction {
Instruction& operator=(const Instruction& instr) {
value = instr.value;
@@ -565,6 +571,13 @@ union Instruction {
} nop;
union {
+ BitField<48, 2, VoteOperation> operation;
+ BitField<45, 3, u64> dest_pred;
+ BitField<39, 3, u64> value;
+ BitField<42, 1, u64> negate_value;
+ } vote;
+
+ union {
BitField<8, 8, Register> gpr;
BitField<20, 24, s64> offset;
} gmem;
@@ -873,6 +886,7 @@ union Instruction {
union {
BitField<0, 3, u64> pred0;
BitField<3, 3, u64> pred3;
+ BitField<6, 1, u64> neg_b;
BitField<7, 1, u64> abs_a;
BitField<39, 3, u64> pred39;
BitField<42, 1, u64> neg_pred;
@@ -1006,7 +1020,6 @@ union Instruction {
} iset;
union {
- BitField<41, 2, u64> selector; // i2i and i2f only
BitField<45, 1, u64> negate_a;
BitField<49, 1, u64> abs_a;
BitField<10, 2, Register::Size> src_size;
@@ -1032,6 +1045,13 @@ union Instruction {
}
} f2f;
+ union {
+ BitField<41, 2, u64> selector;
+ } int_src;
+
+ union {
+ BitField<41, 1, u64> selector;
+ } float_src;
} conversion;
union {
@@ -1487,6 +1507,7 @@ public:
SYNC,
BRK,
DEPBAR,
+ VOTE,
BFE_C,
BFE_R,
BFE_IMM,
@@ -1649,6 +1670,7 @@ public:
Hfma2,
Flow,
Synch,
+ Warp,
Memory,
Texture,
Image,
@@ -1775,6 +1797,7 @@ private:
INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
INST("111000110000----", Id::EXIT, Type::Flow, "EXIT"),
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
+ INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index c409af194..2c47541cb 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -17,27 +17,15 @@
namespace Tegra {
-u32 FramebufferConfig::BytesPerPixel(PixelFormat format) {
- switch (format) {
- case PixelFormat::ABGR8:
- case PixelFormat::BGRA8:
- return 4;
- default:
- return 4;
- }
-
- UNREACHABLE();
-}
-
GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
: system{system}, renderer{renderer}, is_async{is_async} {
auto& rasterizer{renderer.Rasterizer()};
memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer);
dma_pusher = std::make_unique<Tegra::DmaPusher>(*this);
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager);
- fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager);
+ fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer);
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager);
- maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager);
+ maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 11857ff99..78bc0601a 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -19,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) {
return reinterpret_cast<CacheAddr>(host_ptr);
}
+inline u8* FromCacheAddr(CacheAddr cache_addr) {
+ return reinterpret_cast<u8*>(cache_addr);
+}
+
namespace Core {
class System;
}
@@ -91,14 +95,10 @@ class DebugContext;
struct FramebufferConfig {
enum class PixelFormat : u32 {
ABGR8 = 1,
+ RGB565 = 4,
BGRA8 = 5,
};
- /**
- * Returns the number of bytes per pixel.
- */
- static u32 BytesPerPixel(PixelFormat format);
-
VAddr address;
u32 offset;
u32 width;
@@ -249,8 +249,7 @@ public:
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
/// Swap buffers (render frame)
- virtual void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+ virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
virtual void FlushRegion(CacheAddr addr, u64 size) = 0;
@@ -281,8 +280,8 @@ private:
protected:
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
- VideoCore::RendererBase& renderer;
Core::System& system;
+ VideoCore::RendererBase& renderer;
private:
std::unique_ptr<Tegra::MemoryManager> memory_manager;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index ea67be831..f2a3a390e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -23,9 +23,8 @@ void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
-void GPUAsynch::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- gpu_thread.SwapBuffers(std::move(framebuffer));
+void GPUAsynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ gpu_thread.SwapBuffers(framebuffer);
}
void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 36377d677..a12f9bac4 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -14,15 +14,14 @@ class RendererBase;
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU asynchronously
-class GPUAsynch : public Tegra::GPU {
+class GPUAsynch final : public Tegra::GPU {
public:
explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUAsynch() override;
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index d4ead9c47..d48221077 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -19,9 +19,8 @@ void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->DispatchCalls();
}
-void GPUSynch::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- renderer.SwapBuffers(std::move(framebuffer));
+void GPUSynch::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ renderer.SwapBuffers(framebuffer);
}
void GPUSynch::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 07bcc47f1..5eb1c461c 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -13,15 +13,14 @@ class RendererBase;
namespace VideoCommon {
/// Implementation of GPU interface that runs the GPU synchronously
-class GPUSynch : public Tegra::GPU {
+class GPUSynch final : public Tegra::GPU {
public:
explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer);
~GPUSynch() override;
void Start() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(CacheAddr addr, u64 size) override;
void InvalidateRegion(CacheAddr addr, u64 size) override;
void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index b441e92b0..5f039e4fd 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -39,7 +39,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p
dma_pusher.Push(std::move(submit_list->entries));
dma_pusher.DispatchCalls();
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
- renderer.SwapBuffers(std::move(data->framebuffer));
+ renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
renderer.Rasterizer().FlushRegion(data->addr, data->size);
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
@@ -78,9 +78,9 @@ void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
}
-void ThreadManager::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
- PushCommand(SwapBuffersCommand(std::move(framebuffer)));
+void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
+ PushCommand(SwapBuffersCommand(framebuffer ? *framebuffer
+ : std::optional<const Tegra::FramebufferConfig>{}));
}
void ThreadManager::FlushRegion(CacheAddr addr, u64 size) {
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 1d9d0c39e..3ae0ec9f3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -110,8 +110,7 @@ public:
void SubmitList(Tegra::CommandList&& entries);
/// Swap buffers (render frame)
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
void FlushRegion(CacheAddr addr, u64 size);
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index 3e91cbc83..084f85e67 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -25,8 +25,8 @@ static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
// pixel values.
- const u32 tile_size_x{GetDefaultBlockWidth(format)};
- const u32 tile_size_y{GetDefaultBlockHeight(format)};
+ constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
+ constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
if constexpr (morton_to_linear) {
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
@@ -186,99 +186,6 @@ static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFor
return morton_to_linear_fns[static_cast<std::size_t>(format)];
}
-static u32 MortonInterleave128(u32 x, u32 y) {
- // 128x128 Z-Order coordinate from 2D coordinates
- static constexpr u32 xlut[] = {
- 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042,
- 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809,
- 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000,
- 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043,
- 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a,
- 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001,
- 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048,
- 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b,
- 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002,
- 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049,
- 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840,
- 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003,
- 0x0008, 0x0009, 0x000a, 0x000b, 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a,
- 0x004b, 0x0800, 0x0801, 0x0802, 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841,
- 0x0842, 0x0843, 0x0848, 0x0849, 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008,
- 0x1009, 0x100a, 0x100b, 0x1040, 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b,
- 0x1800, 0x1801, 0x1802, 0x1803, 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842,
- 0x1843, 0x1848, 0x1849, 0x184a, 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009,
- 0x200a, 0x200b, 0x2040, 0x2041, 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800,
- 0x2801, 0x2802, 0x2803, 0x2808, 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843,
- 0x2848, 0x2849, 0x284a, 0x284b, 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a,
- 0x300b, 0x3040, 0x3041, 0x3042, 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801,
- 0x3802, 0x3803, 0x3808, 0x3809, 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848,
- 0x3849, 0x384a, 0x384b, 0x0000, 0x0001, 0x0002, 0x0003, 0x0008, 0x0009, 0x000a, 0x000b,
- 0x0040, 0x0041, 0x0042, 0x0043, 0x0048, 0x0049, 0x004a, 0x004b, 0x0800, 0x0801, 0x0802,
- 0x0803, 0x0808, 0x0809, 0x080a, 0x080b, 0x0840, 0x0841, 0x0842, 0x0843, 0x0848, 0x0849,
- 0x084a, 0x084b, 0x1000, 0x1001, 0x1002, 0x1003, 0x1008, 0x1009, 0x100a, 0x100b, 0x1040,
- 0x1041, 0x1042, 0x1043, 0x1048, 0x1049, 0x104a, 0x104b, 0x1800, 0x1801, 0x1802, 0x1803,
- 0x1808, 0x1809, 0x180a, 0x180b, 0x1840, 0x1841, 0x1842, 0x1843, 0x1848, 0x1849, 0x184a,
- 0x184b, 0x2000, 0x2001, 0x2002, 0x2003, 0x2008, 0x2009, 0x200a, 0x200b, 0x2040, 0x2041,
- 0x2042, 0x2043, 0x2048, 0x2049, 0x204a, 0x204b, 0x2800, 0x2801, 0x2802, 0x2803, 0x2808,
- 0x2809, 0x280a, 0x280b, 0x2840, 0x2841, 0x2842, 0x2843, 0x2848, 0x2849, 0x284a, 0x284b,
- 0x3000, 0x3001, 0x3002, 0x3003, 0x3008, 0x3009, 0x300a, 0x300b, 0x3040, 0x3041, 0x3042,
- 0x3043, 0x3048, 0x3049, 0x304a, 0x304b, 0x3800, 0x3801, 0x3802, 0x3803, 0x3808, 0x3809,
- 0x380a, 0x380b, 0x3840, 0x3841, 0x3842, 0x3843, 0x3848, 0x3849, 0x384a, 0x384b,
- };
- static constexpr u32 ylut[] = {
- 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090,
- 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124,
- 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200,
- 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294,
- 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330,
- 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404,
- 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0,
- 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534,
- 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610,
- 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4,
- 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780,
- 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014,
- 0x0020, 0x0024, 0x0030, 0x0034, 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0,
- 0x00b4, 0x0100, 0x0104, 0x0110, 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184,
- 0x0190, 0x0194, 0x01a0, 0x01a4, 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220,
- 0x0224, 0x0230, 0x0234, 0x0280, 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4,
- 0x0300, 0x0304, 0x0310, 0x0314, 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390,
- 0x0394, 0x03a0, 0x03a4, 0x03b0, 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424,
- 0x0430, 0x0434, 0x0480, 0x0484, 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500,
- 0x0504, 0x0510, 0x0514, 0x0520, 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594,
- 0x05a0, 0x05a4, 0x05b0, 0x05b4, 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630,
- 0x0634, 0x0680, 0x0684, 0x0690, 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704,
- 0x0710, 0x0714, 0x0720, 0x0724, 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0,
- 0x07a4, 0x07b0, 0x07b4, 0x0000, 0x0004, 0x0010, 0x0014, 0x0020, 0x0024, 0x0030, 0x0034,
- 0x0080, 0x0084, 0x0090, 0x0094, 0x00a0, 0x00a4, 0x00b0, 0x00b4, 0x0100, 0x0104, 0x0110,
- 0x0114, 0x0120, 0x0124, 0x0130, 0x0134, 0x0180, 0x0184, 0x0190, 0x0194, 0x01a0, 0x01a4,
- 0x01b0, 0x01b4, 0x0200, 0x0204, 0x0210, 0x0214, 0x0220, 0x0224, 0x0230, 0x0234, 0x0280,
- 0x0284, 0x0290, 0x0294, 0x02a0, 0x02a4, 0x02b0, 0x02b4, 0x0300, 0x0304, 0x0310, 0x0314,
- 0x0320, 0x0324, 0x0330, 0x0334, 0x0380, 0x0384, 0x0390, 0x0394, 0x03a0, 0x03a4, 0x03b0,
- 0x03b4, 0x0400, 0x0404, 0x0410, 0x0414, 0x0420, 0x0424, 0x0430, 0x0434, 0x0480, 0x0484,
- 0x0490, 0x0494, 0x04a0, 0x04a4, 0x04b0, 0x04b4, 0x0500, 0x0504, 0x0510, 0x0514, 0x0520,
- 0x0524, 0x0530, 0x0534, 0x0580, 0x0584, 0x0590, 0x0594, 0x05a0, 0x05a4, 0x05b0, 0x05b4,
- 0x0600, 0x0604, 0x0610, 0x0614, 0x0620, 0x0624, 0x0630, 0x0634, 0x0680, 0x0684, 0x0690,
- 0x0694, 0x06a0, 0x06a4, 0x06b0, 0x06b4, 0x0700, 0x0704, 0x0710, 0x0714, 0x0720, 0x0724,
- 0x0730, 0x0734, 0x0780, 0x0784, 0x0790, 0x0794, 0x07a0, 0x07a4, 0x07b0, 0x07b4,
- };
- return xlut[x % 128] + ylut[y % 128];
-}
-
-static u32 GetMortonOffset128(u32 x, u32 y, u32 bytes_per_pixel) {
- // Calculates the offset of the position of the pixel in Morton order
- // Framebuffer images are split into 128x128 tiles.
-
- constexpr u32 block_height = 128;
- const u32 coarse_x = x & ~127;
-
- const u32 i = MortonInterleave128(x, y);
-
- const u32 offset = coarse_x * block_height;
-
- return (i + offset) * bytes_per_pixel;
-}
-
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr) {
@@ -286,23 +193,4 @@ void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stri
tile_width_spacing, buffer, addr);
}
-void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
- u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data) {
- const bool morton_to_linear = mode == MortonSwizzleMode::MortonToLinear;
- u8* data_ptrs[2];
- for (u32 y = 0; y < height; ++y) {
- for (u32 x = 0; x < width; ++x) {
- const u32 coarse_y = y & ~127;
- const u32 morton_offset =
- GetMortonOffset128(x, y, bytes_per_pixel) + coarse_y * width * bytes_per_pixel;
- const u32 linear_pixel_index = (x + y * width) * linear_bytes_per_pixel;
-
- data_ptrs[morton_to_linear ? 1 : 0] = morton_data + morton_offset;
- data_ptrs[morton_to_linear ? 0 : 1] = &linear_data[linear_pixel_index];
-
- std::memcpy(data_ptrs[0], data_ptrs[1], bytes_per_pixel);
- }
- }
-}
-
} // namespace VideoCore
diff --git a/src/video_core/morton.h b/src/video_core/morton.h
index ee5b45555..b714a7e3f 100644
--- a/src/video_core/morton.h
+++ b/src/video_core/morton.h
@@ -15,7 +15,4 @@ void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat forma
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
u8* buffer, u8* addr);
-void MortonCopyPixels128(MortonSwizzleMode mode, u32 width, u32 height, u32 bytes_per_pixel,
- u32 linear_bytes_per_pixel, u8* morton_data, u8* linear_data);
-
} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 6e44d51cf..6b3f2d50a 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -50,7 +50,7 @@ public:
/// and invalidated
virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
- // Notify the rasterizer to send all written commands to the host GPU.
+ /// Notify the rasterizer to send all written commands to the host GPU.
virtual void FlushCommands() = 0;
/// Notify rasterizer that a frame is about to finish
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 1d54c3723..af1bebc4f 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -36,8 +36,7 @@ public:
virtual ~RendererBase();
/// Swap buffers (render frame)
- virtual void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0;
+ virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
/// Initialize the renderer
virtual bool Init() = 0;
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 2a9b523f5..f8a807c84 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,28 +7,41 @@
#include <glad/glad.h>
#include "common/assert.h"
+#include "common/microprofile.h"
+#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
+MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
+
+CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
+ : VideoCommon::BufferBlock{cache_addr, size} {
+ gl_buffer.Create();
+ glNamedBufferData(gl_buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
+}
+
+CachedBufferBlock::~CachedBufferBlock() = default;
+
OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
std::size_t stream_size)
- : VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer>{
+ : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
OGLBufferCache::~OGLBufferCache() = default;
-OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) {
- OGLBuffer buffer;
- buffer.Create();
- glNamedBufferData(buffer.handle, static_cast<GLsizeiptr>(size), nullptr, GL_DYNAMIC_DRAW);
- return buffer;
+Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
+ return std::make_shared<CachedBufferBlock>(cache_addr, size);
+}
+
+void OGLBufferCache::WriteBarrier() {
+ glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
-const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) {
- return &buffer.handle;
+const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) {
+ return buffer->GetHandle();
}
const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
@@ -36,23 +49,24 @@ const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) {
return &null_buffer;
}
-void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) {
- glNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ const u8* data) {
+ glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
-void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset,
- std::size_t size, u8* data) {
- glGetNamedBufferSubData(buffer.handle, static_cast<GLintptr>(offset),
+void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ u8* data) {
+ MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
+ glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data);
}
-void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst,
- std::size_t src_offset, std::size_t dst_offset,
- std::size_t size) {
- glCopyNamedBufferSubData(src.handle, dst.handle, static_cast<GLintptr>(src_offset),
- static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
+void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+ std::size_t dst_offset, std::size_t size) {
+ glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(),
+ static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset),
+ static_cast<GLsizeiptr>(size));
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 8c8ac4038..022e7bfa9 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -7,7 +7,7 @@
#include <memory>
#include "common/common_types.h"
-#include "video_core/buffer_cache.h"
+#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -21,7 +21,24 @@ namespace OpenGL {
class OGLStreamBuffer;
class RasterizerOpenGL;
-class OGLBufferCache final : public VideoCommon::BufferCache<OGLBuffer, GLuint, OGLStreamBuffer> {
+class CachedBufferBlock;
+
+using Buffer = std::shared_ptr<CachedBufferBlock>;
+
+class CachedBufferBlock : public VideoCommon::BufferBlock {
+public:
+ explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size);
+ ~CachedBufferBlock();
+
+ const GLuint* GetHandle() const {
+ return &gl_buffer.handle;
+ }
+
+private:
+ OGLBuffer gl_buffer{};
+};
+
+class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
public:
explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
std::size_t stream_size);
@@ -30,18 +47,20 @@ public:
const GLuint* GetEmptyBuffer(std::size_t) override;
protected:
- OGLBuffer CreateBuffer(std::size_t size) override;
+ Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
+
+ void WriteBarrier() override;
- const GLuint* ToHandle(const OGLBuffer& buffer) override;
+ const GLuint* ToHandle(const Buffer& buffer) override;
- void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
- const u8* data) override;
+ void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ const u8* data) override;
- void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size,
- u8* data) override;
+ void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
+ u8* data) override;
- void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset,
- std::size_t dst_offset, std::size_t size) override;
+ void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
+ std::size_t dst_offset, std::size_t size) override;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 85424a4c9..4f59a87b4 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -14,12 +14,22 @@
namespace OpenGL {
namespace {
+
template <typename T>
T GetInteger(GLenum pname) {
GLint temporary;
glGetIntegerv(pname, &temporary);
return static_cast<T>(temporary);
}
+
+bool TestProgram(const GLchar* glsl) {
+ const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &glsl)};
+ GLint link_status;
+ glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
+ glDeleteProgram(shader);
+ return link_status == GL_TRUE;
+}
+
} // Anonymous namespace
Device::Device() {
@@ -27,42 +37,41 @@ Device::Device() {
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
+ has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
+ GLAD_GL_NV_shader_thread_shuffle;
has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
has_variable_aoffi = TestVariableAoffi();
has_component_indexing_bug = TestComponentIndexingBug();
+ has_precise_bug = TestPreciseBug();
+
+ LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
+ LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
+ LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
}
Device::Device(std::nullptr_t) {
uniform_buffer_alignment = 0;
max_vertex_attributes = 16;
max_varyings = 15;
+ has_warp_intrinsics = true;
has_vertex_viewport_layer = true;
has_variable_aoffi = true;
has_component_indexing_bug = false;
+ has_precise_bug = false;
}
bool Device::TestVariableAoffi() {
- const GLchar* AOFFI_TEST = R"(#version 430 core
+ return TestProgram(R"(#version 430 core
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
out vec4 output_attribute;
void main() {
output_attribute = textureOffset(tex, vec2(0), variable_offset);
-}
-)";
- const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)};
- GLint link_status{};
- glGetProgramiv(shader, GL_LINK_STATUS, &link_status);
- glDeleteProgram(shader);
-
- const bool supported{link_status == GL_TRUE};
- LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", supported);
- return supported;
+})");
}
bool Device::TestComponentIndexingBug() {
- constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
const GLchar* COMPONENT_TEST = R"(#version 430 core
layout (std430, binding = 0) buffer OutputBuffer {
uint output_value;
@@ -102,12 +111,21 @@ void main() {
GLuint result;
glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
if (result != values.at(index)) {
- LOG_INFO(Render_OpenGL, log_message, true);
return true;
}
}
- LOG_INFO(Render_OpenGL, log_message, false);
return false;
}
+bool Device::TestPreciseBug() {
+ return !TestProgram(R"(#version 430 core
+in vec3 coords;
+out float out_value;
+uniform sampler2DShadow tex;
+void main() {
+ precise float tmp_value = vec4(texture(tex, coords)).x;
+ out_value = tmp_value;
+})");
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index dc883722d..ba6dcd3be 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -30,6 +30,10 @@ public:
return max_varyings;
}
+ bool HasWarpIntrinsics() const {
+ return has_warp_intrinsics;
+ }
+
bool HasVertexViewportLayer() const {
return has_vertex_viewport_layer;
}
@@ -42,17 +46,24 @@ public:
return has_component_indexing_bug;
}
+ bool HasPreciseBug() const {
+ return has_precise_bug;
+ }
+
private:
static bool TestVariableAoffi();
static bool TestComponentIndexingBug();
+ static bool TestPreciseBug();
std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
+ bool has_warp_intrinsics{};
bool has_vertex_viewport_layer{};
bool has_variable_aoffi{};
bool has_component_indexing_bug{};
+ bool has_precise_bug{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 80cfda7e4..bb09ecd52 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -708,8 +708,6 @@ void RasterizerOpenGL::DrawArrays() {
return;
}
- const auto& regs = gpu.regs;
-
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
@@ -980,7 +978,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr
GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
- buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten());
+ buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1c90facc3..cf6a5cddf 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -212,7 +212,9 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
const auto texture_buffer_usage{variant.texture_buffer_usage};
std::string source = "#version 430 core\n"
- "#extension GL_ARB_separate_shader_objects : enable\n";
+ "#extension GL_ARB_separate_shader_objects : enable\n"
+ "#extension GL_NV_gpu_shader5 : enable\n"
+ "#extension GL_NV_shader_thread_group : enable\n";
if (entries.shader_viewport_layer_array) {
source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
}
@@ -247,20 +249,24 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
if (!texture_buffer_usage.test(i)) {
continue;
}
- source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i);
+ source += fmt::format("#define SAMPLER_{}_IS_BUFFER\n", i);
+ }
+ if (texture_buffer_usage.any()) {
+ source += '\n';
}
if (program_type == ProgramType::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] =
GetPrimitiveDescription(primitive_mode);
- source += "layout (" + std::string(glsl_topology) + ") in;\n";
+ source += "layout (" + std::string(glsl_topology) + ") in;\n\n";
source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n';
}
if (program_type == ProgramType::Compute) {
source += "layout (local_size_variable) in;\n";
}
+ source += '\n';
source += code;
OGLShader shader;
@@ -289,7 +295,7 @@ std::set<GLenum> GetSupportedFormats() {
CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
GLShader::ProgramResult result)
- : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr},
+ : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
unique_identifier{params.unique_identifier}, program_type{program_type},
disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index a3106a0ff..2c8faf855 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -106,7 +106,6 @@ private:
ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
- u8* host_ptr{};
VAddr cpu_addr{};
u64 unique_identifier{};
ProgramType program_type{};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index d8f722c26..a5cc1a86f 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -39,7 +39,7 @@ using namespace VideoCommon::Shader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&;
-enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
+enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
struct TextureAoffi {};
using TextureArgument = std::pair<Type, Node>;
@@ -48,7 +48,7 @@ using TextureIR = std::variant<TextureAoffi, TextureArgument>;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
-class ShaderWriter {
+class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
DEBUG_ASSERT(scope >= 0);
@@ -93,9 +93,157 @@ private:
u32 temporary_index = 1;
};
+class Expression final {
+public:
+ Expression(std::string code, Type type) : code{std::move(code)}, type{type} {
+ ASSERT(type != Type::Void);
+ }
+ Expression() : type{Type::Void} {}
+
+ Type GetType() const {
+ return type;
+ }
+
+ std::string GetCode() const {
+ return code;
+ }
+
+ void CheckVoid() const {
+ ASSERT(type == Type::Void);
+ }
+
+ std::string As(Type type) const {
+ switch (type) {
+ case Type::Bool:
+ return AsBool();
+ case Type::Bool2:
+ return AsBool2();
+ case Type::Float:
+ return AsFloat();
+ case Type::Int:
+ return AsInt();
+ case Type::Uint:
+ return AsUint();
+ case Type::HalfFloat:
+ return AsHalfFloat();
+ default:
+ UNREACHABLE_MSG("Invalid type");
+ return code;
+ }
+ }
+
+ std::string AsBool() const {
+ switch (type) {
+ case Type::Bool:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsBool2() const {
+ switch (type) {
+ case Type::Bool2:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsFloat() const {
+ switch (type) {
+ case Type::Float:
+ return code;
+ case Type::Uint:
+ return fmt::format("utof({})", code);
+ case Type::Int:
+ return fmt::format("itof({})", code);
+ case Type::HalfFloat:
+ return fmt::format("utof(packHalf2x16({}))", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsInt() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("ftoi({})", code);
+ case Type::Uint:
+ return fmt::format("int({})", code);
+ case Type::Int:
+ return code;
+ case Type::HalfFloat:
+ return fmt::format("int(packHalf2x16({}))", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsUint() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("ftou({})", code);
+ case Type::Uint:
+ return code;
+ case Type::Int:
+ return fmt::format("uint({})", code);
+ case Type::HalfFloat:
+ return fmt::format("packHalf2x16({})", code);
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+ std::string AsHalfFloat() const {
+ switch (type) {
+ case Type::Float:
+ return fmt::format("unpackHalf2x16(ftou({}))", code);
+ case Type::Uint:
+ return fmt::format("unpackHalf2x16({})", code);
+ case Type::Int:
+ return fmt::format("unpackHalf2x16(int({}))", code);
+ case Type::HalfFloat:
+ return code;
+ default:
+ UNREACHABLE_MSG("Incompatible types");
+ return code;
+ }
+ }
+
+private:
+ std::string code;
+ Type type{};
+};
+
+constexpr const char* GetTypeString(Type type) {
+ switch (type) {
+ case Type::Bool:
+ return "bool";
+ case Type::Bool2:
+ return "bvec2";
+ case Type::Float:
+ return "float";
+ case Type::Int:
+ return "int";
+ case Type::Uint:
+ return "uint";
+ case Type::HalfFloat:
+ return "vec2";
+ default:
+ UNREACHABLE_MSG("Invalid type");
+ return "<invalid type>";
+ }
+}
+
/// Generates code to use for a swizzle operation.
constexpr const char* GetSwizzle(u32 element) {
- constexpr std::array<const char*, 4> swizzle = {".x", ".y", ".z", ".w"};
+ constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
@@ -134,8 +282,8 @@ constexpr bool IsGenericAttribute(Attribute::Index index) {
return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
}
-constexpr Attribute::Index ToGenericAttribute(u32 value) {
- return static_cast<Attribute::Index>(value + static_cast<u32>(Attribute::Index::Attribute_0));
+constexpr Attribute::Index ToGenericAttribute(u64 value) {
+ return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
}
u32 GetGenericAttributeIndex(Attribute::Index index) {
@@ -191,7 +339,7 @@ public:
// VM's program counter
const auto first_address = ir.GetBasicBlocks().begin()->first;
- code.AddLine("uint jmp_to = {}u;", first_address);
+ code.AddLine("uint jmp_to = {}U;", first_address);
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
@@ -199,7 +347,7 @@ public:
constexpr u32 FLOW_STACK_SIZE = 20;
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
- code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
+ code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
}
}
@@ -210,7 +358,7 @@ public:
for (const auto& pair : ir.GetBasicBlocks()) {
const auto [address, bb] = pair;
- code.AddLine("case 0x{:x}u: {{", address);
+ code.AddLine("case 0x{:X}U: {{", address);
++code.scope;
VisitBlock(bb);
@@ -322,7 +470,7 @@ private:
void DeclareRegisters() {
const auto& registers = ir.GetRegisters();
for (const u32 gpr : registers) {
- code.AddLine("float {} = 0;", GetRegister(gpr));
+ code.AddLine("float {} = 0.0f;", GetRegister(gpr));
}
if (!registers.empty()) {
code.AddNewLine();
@@ -348,7 +496,7 @@ private:
return;
}
const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
- code.AddLine("float {}[{}];", GetLocalMemory(), element_count);
+ code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -371,8 +519,6 @@ private:
return "noperspective ";
default:
case AttributeUse::Unused:
- UNREACHABLE_MSG("Unused attribute being fetched");
- return {};
UNIMPLEMENTED_MSG("Unknown attribute usage index={}", static_cast<u32>(attribute));
return {};
}
@@ -449,7 +595,7 @@ private:
const auto [index, size] = entry;
code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index,
GetConstBufferBlock(index));
- code.AddLine(" vec4 {}[MAX_CONSTBUFFER_ELEMENTS];", GetConstBuffer(index));
+ code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
code.AddLine("}};");
code.AddNewLine();
}
@@ -470,7 +616,7 @@ private:
code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{",
base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base));
- code.AddLine(" float {}[];", GetGlobalMemory(base));
+ code.AddLine(" uint {}[];", GetGlobalMemory(base));
code.AddLine("}};");
code.AddNewLine();
}
@@ -528,7 +674,7 @@ private:
if (!ir.HasPhysicalAttributes()) {
return;
}
- code.AddLine("float readPhysicalAttribute(uint physical_address) {{");
+ code.AddLine("float ReadPhysicalAttribute(uint physical_address) {{");
++code.scope;
code.AddLine("switch (physical_address) {{");
@@ -537,15 +683,16 @@ private:
for (u32 index = 0; index < num_attributes; ++index) {
const auto attribute{ToGenericAttribute(index)};
for (u32 element = 0; element < 4; ++element) {
- constexpr u32 generic_base{0x80};
- constexpr u32 generic_stride{16};
- constexpr u32 element_stride{4};
+ constexpr u32 generic_base = 0x80;
+ constexpr u32 generic_stride = 16;
+ constexpr u32 element_stride = 4;
const u32 address{generic_base + index * generic_stride + element * element_stride};
- const bool declared{stage != ProgramType::Fragment ||
- header.ps.GetAttributeUse(index) != AttributeUse::Unused};
- const std::string value{declared ? ReadAttribute(attribute, element) : "0"};
- code.AddLine("case 0x{:x}: return {};", address, value);
+ const bool declared = stage != ProgramType::Fragment ||
+ header.ps.GetAttributeUse(index) != AttributeUse::Unused;
+ const std::string value =
+ declared ? ReadAttribute(attribute, element).AsFloat() : "0.0f";
+ code.AddLine("case 0x{:X}U: return {};", address, value);
}
}
@@ -565,7 +712,7 @@ private:
case Tegra::Shader::ImageType::Texture1D:
return "image1D";
case Tegra::Shader::ImageType::TextureBuffer:
- return "bufferImage";
+ return "imageBuffer";
case Tegra::Shader::ImageType::Texture1DArray:
return "image1DArray";
case Tegra::Shader::ImageType::Texture2D:
@@ -590,13 +737,11 @@ private:
void VisitBlock(const NodeBlock& bb) {
for (const auto& node : bb) {
- if (const std::string expr = Visit(node); !expr.empty()) {
- code.AddLine(expr);
- }
+ Visit(node).CheckVoid();
}
}
- std::string Visit(const Node& node) {
+ Expression Visit(const Node& node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
@@ -614,18 +759,18 @@ private:
if (const auto gpr = std::get_if<GprNode>(&*node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
- return "0";
+ return {"0U", Type::Uint};
}
- return GetRegister(index);
+ return {GetRegister(index), Type::Float};
}
if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
// For eyecandy avoid using hex numbers on single digits
- return fmt::format("utof({}u)", immediate->GetValue());
+ return {fmt::format("{}U", immediate->GetValue()), Type::Uint};
}
- return fmt::format("utof(0x{:x}u)", immediate->GetValue());
+ return {fmt::format("0x{:X}U", immediate->GetValue()), Type::Uint};
}
if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
@@ -640,17 +785,18 @@ private:
}
}();
if (predicate->IsNegated()) {
- return fmt::format("!({})", value);
+ return {fmt::format("!({})", value), Type::Bool};
}
- return value;
+ return {value, Type::Bool};
}
if (const auto abuf = std::get_if<AbufNode>(&*node)) {
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
- return fmt::format("readPhysicalAttribute(ftou({}))",
- Visit(abuf->GetPhysicalAddress()));
+ return {fmt::format("ReadPhysicalAttribute({})",
+ Visit(abuf->GetPhysicalAddress()).AsUint()),
+ Type::Float};
}
return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
}
@@ -661,59 +807,64 @@ private:
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
- return fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
- offset_imm / (4 * 4), (offset_imm / 4) % 4);
+ return {fmt::format("{}[{}][{}]", GetConstBuffer(cbuf->GetIndex()),
+ offset_imm / (4 * 4), (offset_imm / 4) % 4),
+ Type::Uint};
}
if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
+ code.AddLine("uint {} = {} >> 2;", final_offset, Visit(offset).AsUint());
if (!device.HasComponentIndexingBug()) {
- return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset);
+ return {fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+ final_offset, final_offset),
+ Type::Uint};
}
// AMD's proprietary GLSL compiler emits ill code for variable component access.
// To bypass this driver bug generate 4 ifs, one per each component.
const std::string pack = code.GenerateTemporary();
- code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+ code.AddLine("uvec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
final_offset);
const std::string result = code.GenerateTemporary();
- code.AddLine("float {};", result);
+ code.AddLine("uint {};", result);
for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
pack, GetSwizzle(swizzle));
}
- return result;
+ return {result, Type::Uint};
}
UNREACHABLE_MSG("Unmanaged offset node type");
}
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
- const std::string real = Visit(gmem->GetRealAddress());
- const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
- return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ const std::string real = Visit(gmem->GetRealAddress()).AsUint();
+ const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
+ const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
+ return {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
+ Type::Uint};
}
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
- return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ return {
+ fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
+ Type::Uint};
}
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
- return GetInternalFlag(internal_flag->GetFlag());
+ return {GetInternalFlag(internal_flag->GetFlag()), Type::Bool};
}
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
- code.AddLine("if ({}) {{", Visit(conditional->GetCondition()));
+ code.AddLine("if ({}) {{", Visit(conditional->GetCondition()).AsBool());
++code.scope;
VisitBlock(conditional->GetCode());
@@ -724,20 +875,21 @@ private:
}
if (const auto comment = std::get_if<CommentNode>(&*node)) {
- return "// " + comment->GetText();
+ code.AddLine("// " + comment->GetText());
+ return {};
}
UNREACHABLE();
return {};
}
- std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
+ Expression ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
const auto GeometryPass = [&](std::string_view name) {
if (stage == ProgramType::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
- return fmt::format("gs_{}[ftou({}) % MAX_VERTEX_INPUT]", name, Visit(buffer));
+ return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
}
return std::string(name);
};
@@ -746,25 +898,27 @@ private:
case Attribute::Index::Position:
switch (stage) {
case ProgramType::Geometry:
- return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
- GetSwizzle(element));
+ return {fmt::format("gl_in[{}].gl_Position{}", Visit(buffer).AsUint(),
+ GetSwizzle(element)),
+ Type::Float};
case ProgramType::Fragment:
- return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
+ return {element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)),
+ Type::Float};
default:
UNREACHABLE();
}
case Attribute::Index::PointCoord:
switch (element) {
case 0:
- return "gl_PointCoord.x";
+ return {"gl_PointCoord.x", Type::Float};
case 1:
- return "gl_PointCoord.y";
+ return {"gl_PointCoord.y", Type::Float};
case 2:
case 3:
- return "0";
+ return {"0.0f", Type::Float};
}
UNREACHABLE();
- return "0";
+ return {"0", Type::Int};
case Attribute::Index::TessCoordInstanceIDVertexID:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
@@ -773,44 +927,49 @@ private:
switch (element) {
case 2:
// Config pack's first value is instance_id.
- return "uintBitsToFloat(config_pack[0])";
+ return {"config_pack[0]", Type::Uint};
case 3:
- return "uintBitsToFloat(gl_VertexID)";
+ return {"gl_VertexID", Type::Int};
}
UNIMPLEMENTED_MSG("Unmanaged TessCoordInstanceIDVertexID element={}", element);
- return "0";
+ return {"0", Type::Int};
case Attribute::Index::FrontFacing:
// TODO(Subv): Find out what the values are for the other elements.
ASSERT(stage == ProgramType::Fragment);
switch (element) {
case 3:
- return "itof(gl_FrontFacing ? -1 : 0)";
+ return {"(gl_FrontFacing ? -1 : 0)", Type::Int};
}
UNIMPLEMENTED_MSG("Unmanaged FrontFacing element={}", element);
- return "0";
+ return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
- return GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element);
+ return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
+ Type::Float};
}
break;
}
UNIMPLEMENTED_MSG("Unhandled input attribute: {}", static_cast<u32>(attribute));
- return "0";
+ return {"0", Type::Int};
}
- std::string ApplyPrecise(Operation operation, const std::string& value) {
+ Expression ApplyPrecise(Operation operation, std::string value, Type type) {
if (!IsPrecise(operation)) {
- return value;
+ return {std::move(value), type};
}
- // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
- const std::string precise = stage != ProgramType::Fragment ? "precise " : "";
+ // Old Nvidia drivers have a bug with precise and texture sampling. These are more likely to
+ // be found in fragment shaders, so we disable precise there. There are vertex shaders that
+ // also fail to build but nobody seems to care about those.
+ // Note: Only bugged drivers will skip precise.
+ const bool disable_precise = device.HasPreciseBug() && stage == ProgramType::Fragment;
- const std::string temporary = code.GenerateTemporary();
- code.AddLine("{}float {} = {};", precise, temporary, value);
- return temporary;
+ std::string temporary = code.GenerateTemporary();
+ code.AddLine("{}{} {} = {};", disable_precise ? "" : "precise ", GetTypeString(type),
+ temporary, value);
+ return {std::move(temporary), type};
}
- std::string VisitOperand(Operation operation, std::size_t operand_index) {
+ Expression VisitOperand(Operation operation, std::size_t operand_index) {
const auto& operand = operation[operand_index];
const bool parent_precise = IsPrecise(operation);
const bool child_precise = IsPrecise(operand);
@@ -819,19 +978,16 @@ private:
return Visit(operand);
}
- const std::string temporary = code.GenerateTemporary();
- code.AddLine("float {} = {};", temporary, Visit(operand));
- return temporary;
- }
-
- std::string VisitOperand(Operation operation, std::size_t operand_index, Type type) {
- return CastOperand(VisitOperand(operation, operand_index), type);
+ Expression value = Visit(operand);
+ std::string temporary = code.GenerateTemporary();
+ code.AddLine("{} {} = {};", GetTypeString(value.GetType()), temporary, value.GetCode());
+ return {std::move(temporary), value.GetType()};
}
- std::optional<std::pair<std::string, bool>> GetOutputAttribute(const AbufNode* abuf) {
+ Expression GetOutputAttribute(const AbufNode* abuf) {
switch (const auto attribute = abuf->GetIndex()) {
case Attribute::Index::Position:
- return std::make_pair("gl_Position"s + GetSwizzle(abuf->GetElement()), false);
+ return {"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float};
case Attribute::Index::LayerViewportPointSize:
switch (abuf->GetElement()) {
case 0:
@@ -841,119 +997,79 @@ private:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return std::make_pair("gl_Layer", true);
+ return {"gl_Layer", Type::Int};
case 2:
if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
return {};
}
- return std::make_pair("gl_ViewportIndex", true);
+ return {"gl_ViewportIndex", Type::Int};
case 3:
UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
- return std::make_pair("gl_PointSize", false);
+ return {"gl_PointSize", Type::Float};
}
return {};
case Attribute::Index::ClipDistances0123:
- return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), false);
+ return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float};
case Attribute::Index::ClipDistances4567:
- return std::make_pair(fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4),
- false);
+ return {fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float};
default:
if (IsGenericAttribute(attribute)) {
- return std::make_pair(
- GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), false);
+ return {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()),
+ Type::Float};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
}
}
- std::string CastOperand(const std::string& value, Type type) const {
- switch (type) {
- case Type::Bool:
- case Type::Bool2:
- case Type::Float:
- return value;
- case Type::Int:
- return fmt::format("ftoi({})", value);
- case Type::Uint:
- return fmt::format("ftou({})", value);
- case Type::HalfFloat:
- return fmt::format("toHalf2({})", value);
- }
- UNREACHABLE();
- return value;
- }
-
- std::string BitwiseCastResult(const std::string& value, Type type,
- bool needs_parenthesis = false) {
- switch (type) {
- case Type::Bool:
- case Type::Bool2:
- case Type::Float:
- if (needs_parenthesis) {
- return fmt::format("({})", value);
- }
- return value;
- case Type::Int:
- return fmt::format("itof({})", value);
- case Type::Uint:
- return fmt::format("utof({})", value);
- case Type::HalfFloat:
- return fmt::format("fromHalf2({})", value);
- }
- UNREACHABLE();
- return value;
- }
-
- std::string GenerateUnary(Operation operation, const std::string& func, Type result_type,
- Type type_a, bool needs_parenthesis = true) {
- const std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0, type_a));
-
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type, needs_parenthesis));
+ Expression GenerateUnary(Operation operation, std::string_view func, Type result_type,
+ Type type_a) {
+ std::string op_str = fmt::format("{}({})", func, VisitOperand(operation, 0).As(type_a));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateBinaryInfix(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
+ Expression GenerateBinaryInfix(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ std::string op_str = fmt::format("({} {} {})", op_a, func, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateBinaryCall(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
+ Expression GenerateBinaryCall(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ std::string op_str = fmt::format("{}({}, {})", func, op_a, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateTernary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_c = VisitOperand(operation, 2, type_c);
- const std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
+ Expression GenerateTernary(Operation operation, std::string_view func, Type result_type,
+ Type type_a, Type type_b, Type type_c) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ const std::string op_c = VisitOperand(operation, 2).As(type_c);
+ std::string op_str = fmt::format("{}({}, {}, {})", func, op_a, op_b, op_c);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
- std::string GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
- Type type_a, Type type_b, Type type_c, Type type_d) {
- const std::string op_a = VisitOperand(operation, 0, type_a);
- const std::string op_b = VisitOperand(operation, 1, type_b);
- const std::string op_c = VisitOperand(operation, 2, type_c);
- const std::string op_d = VisitOperand(operation, 3, type_d);
- const std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
+ Expression GenerateQuaternary(Operation operation, const std::string& func, Type result_type,
+ Type type_a, Type type_b, Type type_c, Type type_d) {
+ const std::string op_a = VisitOperand(operation, 0).As(type_a);
+ const std::string op_b = VisitOperand(operation, 1).As(type_b);
+ const std::string op_c = VisitOperand(operation, 2).As(type_c);
+ const std::string op_d = VisitOperand(operation, 3).As(type_d);
+ std::string op_str = fmt::format("{}({}, {}, {}, {})", func, op_a, op_b, op_c, op_d);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, result_type));
+ return ApplyPrecise(operation, std::move(op_str), result_type);
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
const std::vector<TextureIR>& extras) {
- constexpr std::array<const char*, 4> coord_constructors = {"float", "vec2", "vec3", "vec4"};
+ constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -970,17 +1086,17 @@ private:
expr += coord_constructors.at(count + (has_array ? 1 : 0) + (has_shadow ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += Visit(operation[i]);
+ expr += Visit(operation[i]).AsFloat();
const std::size_t next = i + 1;
if (next < count)
expr += ", ";
}
if (has_array) {
- expr += ", float(ftoi(" + Visit(meta->array) + "))";
+ expr += ", float(" + Visit(meta->array).AsInt() + ')';
}
if (has_shadow) {
- expr += ", " + Visit(meta->depth_compare);
+ expr += ", " + Visit(meta->depth_compare).AsFloat();
}
expr += ')';
@@ -1011,11 +1127,11 @@ private:
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else {
- expr += fmt::format("ftoi({})", Visit(operand));
+ expr += Visit(operand).AsInt();
}
break;
case Type::Float:
- expr += Visit(operand);
+ expr += Visit(operand).AsFloat();
break;
default: {
const auto type_int = static_cast<u32>(type);
@@ -1031,7 +1147,7 @@ private:
if (aoffi.empty()) {
return {};
}
- constexpr std::array<const char*, 3> coord_constructors = {"int", "ivec2", "ivec3"};
+ constexpr std::array coord_constructors = {"int", "ivec2", "ivec3"};
std::string expr = ", ";
expr += coord_constructors.at(aoffi.size() - 1);
expr += '(';
@@ -1044,7 +1160,7 @@ private:
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
} else if (device.HasVariableAoffi()) {
// Avoid using variable AOFFI on unsupported devices.
- expr += fmt::format("ftoi({})", Visit(operand));
+ expr += Visit(operand).AsInt();
} else {
// Insert 0 on devices not supporting variable AOFFI.
expr += '0';
@@ -1058,328 +1174,314 @@ private:
return expr;
}
- std::string Assign(Operation operation) {
+ Expression Assign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
- std::string target;
- bool is_integer = false;
-
+ Expression target;
if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
// Writing to Register::ZeroIndex is a no op
return {};
}
- target = GetRegister(gpr->GetIndex());
+ target = {GetRegister(gpr->GetIndex()), Type::Float};
} else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
- const auto result = GetOutputAttribute(abuf);
- if (!result) {
- return {};
- }
- target = result->first;
- is_integer = result->second;
+ target = GetOutputAttribute(abuf);
} else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
if (stage == ProgramType::Compute) {
LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders");
}
- target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
+ target = {
+ fmt::format("{}[{} >> 2]", GetLocalMemory(), Visit(lmem->GetAddress()).AsUint()),
+ Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
- const std::string real = Visit(gmem->GetRealAddress());
- const std::string base = Visit(gmem->GetBaseAddress());
- const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
- target = fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
+ const std::string real = Visit(gmem->GetRealAddress()).AsUint();
+ const std::string base = Visit(gmem->GetBaseAddress()).AsUint();
+ const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
+ target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
+ Type::Uint};
} else {
UNREACHABLE_MSG("Assign called without a proper target");
}
- if (is_integer) {
- code.AddLine("{} = ftoi({});", target, Visit(src));
- } else {
- code.AddLine("{} = {};", target, Visit(src));
- }
+ code.AddLine("{} = {};", target.GetCode(), Visit(src).As(target.GetType()));
return {};
}
template <Type type>
- std::string Add(Operation operation) {
+ Expression Add(Operation operation) {
return GenerateBinaryInfix(operation, "+", type, type, type);
}
template <Type type>
- std::string Mul(Operation operation) {
+ Expression Mul(Operation operation) {
return GenerateBinaryInfix(operation, "*", type, type, type);
}
template <Type type>
- std::string Div(Operation operation) {
+ Expression Div(Operation operation) {
return GenerateBinaryInfix(operation, "/", type, type, type);
}
template <Type type>
- std::string Fma(Operation operation) {
+ Expression Fma(Operation operation) {
return GenerateTernary(operation, "fma", type, type, type, type);
}
template <Type type>
- std::string Negate(Operation operation) {
- return GenerateUnary(operation, "-", type, type, true);
+ Expression Negate(Operation operation) {
+ return GenerateUnary(operation, "-", type, type);
}
template <Type type>
- std::string Absolute(Operation operation) {
- return GenerateUnary(operation, "abs", type, type, false);
+ Expression Absolute(Operation operation) {
+ return GenerateUnary(operation, "abs", type, type);
}
- std::string FClamp(Operation operation) {
+ Expression FClamp(Operation operation) {
return GenerateTernary(operation, "clamp", Type::Float, Type::Float, Type::Float,
Type::Float);
}
- std::string FCastHalf0(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
- return fmt::format("({})[0]", op_a);
+ Expression FCastHalf0(Operation operation) {
+ return {fmt::format("({})[0]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
- std::string FCastHalf1(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat);
- return fmt::format("({})[1]", op_a);
+ Expression FCastHalf1(Operation operation) {
+ return {fmt::format("({})[1]", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
template <Type type>
- std::string Min(Operation operation) {
+ Expression Min(Operation operation) {
return GenerateBinaryCall(operation, "min", type, type, type);
}
template <Type type>
- std::string Max(Operation operation) {
+ Expression Max(Operation operation) {
return GenerateBinaryCall(operation, "max", type, type, type);
}
- std::string Select(Operation operation) {
- const std::string condition = Visit(operation[0]);
- const std::string true_case = Visit(operation[1]);
- const std::string false_case = Visit(operation[2]);
- const std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
+ Expression Select(Operation operation) {
+ const std::string condition = Visit(operation[0]).AsBool();
+ const std::string true_case = Visit(operation[1]).AsUint();
+ const std::string false_case = Visit(operation[2]).AsUint();
+ std::string op_str = fmt::format("({} ? {} : {})", condition, true_case, false_case);
- return ApplyPrecise(operation, op_str);
+ return ApplyPrecise(operation, std::move(op_str), Type::Uint);
}
- std::string FCos(Operation operation) {
- return GenerateUnary(operation, "cos", Type::Float, Type::Float, false);
+ Expression FCos(Operation operation) {
+ return GenerateUnary(operation, "cos", Type::Float, Type::Float);
}
- std::string FSin(Operation operation) {
- return GenerateUnary(operation, "sin", Type::Float, Type::Float, false);
+ Expression FSin(Operation operation) {
+ return GenerateUnary(operation, "sin", Type::Float, Type::Float);
}
- std::string FExp2(Operation operation) {
- return GenerateUnary(operation, "exp2", Type::Float, Type::Float, false);
+ Expression FExp2(Operation operation) {
+ return GenerateUnary(operation, "exp2", Type::Float, Type::Float);
}
- std::string FLog2(Operation operation) {
- return GenerateUnary(operation, "log2", Type::Float, Type::Float, false);
+ Expression FLog2(Operation operation) {
+ return GenerateUnary(operation, "log2", Type::Float, Type::Float);
}
- std::string FInverseSqrt(Operation operation) {
- return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float, false);
+ Expression FInverseSqrt(Operation operation) {
+ return GenerateUnary(operation, "inversesqrt", Type::Float, Type::Float);
}
- std::string FSqrt(Operation operation) {
- return GenerateUnary(operation, "sqrt", Type::Float, Type::Float, false);
+ Expression FSqrt(Operation operation) {
+ return GenerateUnary(operation, "sqrt", Type::Float, Type::Float);
}
- std::string FRoundEven(Operation operation) {
- return GenerateUnary(operation, "roundEven", Type::Float, Type::Float, false);
+ Expression FRoundEven(Operation operation) {
+ return GenerateUnary(operation, "roundEven", Type::Float, Type::Float);
}
- std::string FFloor(Operation operation) {
- return GenerateUnary(operation, "floor", Type::Float, Type::Float, false);
+ Expression FFloor(Operation operation) {
+ return GenerateUnary(operation, "floor", Type::Float, Type::Float);
}
- std::string FCeil(Operation operation) {
- return GenerateUnary(operation, "ceil", Type::Float, Type::Float, false);
+ Expression FCeil(Operation operation) {
+ return GenerateUnary(operation, "ceil", Type::Float, Type::Float);
}
- std::string FTrunc(Operation operation) {
- return GenerateUnary(operation, "trunc", Type::Float, Type::Float, false);
+ Expression FTrunc(Operation operation) {
+ return GenerateUnary(operation, "trunc", Type::Float, Type::Float);
}
template <Type type>
- std::string FCastInteger(Operation operation) {
- return GenerateUnary(operation, "float", Type::Float, type, false);
+ Expression FCastInteger(Operation operation) {
+ return GenerateUnary(operation, "float", Type::Float, type);
}
- std::string ICastFloat(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Float, false);
+ Expression ICastFloat(Operation operation) {
+ return GenerateUnary(operation, "int", Type::Int, Type::Float);
}
- std::string ICastUnsigned(Operation operation) {
- return GenerateUnary(operation, "int", Type::Int, Type::Uint, false);
+ Expression ICastUnsigned(Operation operation) {
+ return GenerateUnary(operation, "int", Type::Int, Type::Uint);
}
template <Type type>
- std::string LogicalShiftLeft(Operation operation) {
+ Expression LogicalShiftLeft(Operation operation) {
return GenerateBinaryInfix(operation, "<<", type, type, Type::Uint);
}
- std::string ILogicalShiftRight(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Uint);
- const std::string op_b = VisitOperand(operation, 1, Type::Uint);
- const std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
+ Expression ILogicalShiftRight(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsUint();
+ const std::string op_b = VisitOperand(operation, 1).AsUint();
+ std::string op_str = fmt::format("int({} >> {})", op_a, op_b);
- return ApplyPrecise(operation, BitwiseCastResult(op_str, Type::Int));
+ return ApplyPrecise(operation, std::move(op_str), Type::Int);
}
- std::string IArithmeticShiftRight(Operation operation) {
+ Expression IArithmeticShiftRight(Operation operation) {
return GenerateBinaryInfix(operation, ">>", Type::Int, Type::Int, Type::Uint);
}
template <Type type>
- std::string BitwiseAnd(Operation operation) {
+ Expression BitwiseAnd(Operation operation) {
return GenerateBinaryInfix(operation, "&", type, type, type);
}
template <Type type>
- std::string BitwiseOr(Operation operation) {
+ Expression BitwiseOr(Operation operation) {
return GenerateBinaryInfix(operation, "|", type, type, type);
}
template <Type type>
- std::string BitwiseXor(Operation operation) {
+ Expression BitwiseXor(Operation operation) {
return GenerateBinaryInfix(operation, "^", type, type, type);
}
template <Type type>
- std::string BitwiseNot(Operation operation) {
- return GenerateUnary(operation, "~", type, type, false);
+ Expression BitwiseNot(Operation operation) {
+ return GenerateUnary(operation, "~", type, type);
}
- std::string UCastFloat(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Float, false);
+ Expression UCastFloat(Operation operation) {
+ return GenerateUnary(operation, "uint", Type::Uint, Type::Float);
}
- std::string UCastSigned(Operation operation) {
- return GenerateUnary(operation, "uint", Type::Uint, Type::Int, false);
+ Expression UCastSigned(Operation operation) {
+ return GenerateUnary(operation, "uint", Type::Uint, Type::Int);
}
- std::string UShiftRight(Operation operation) {
+ Expression UShiftRight(Operation operation) {
return GenerateBinaryInfix(operation, ">>", Type::Uint, Type::Uint, Type::Uint);
}
template <Type type>
- std::string BitfieldInsert(Operation operation) {
+ Expression BitfieldInsert(Operation operation) {
return GenerateQuaternary(operation, "bitfieldInsert", type, type, type, Type::Int,
Type::Int);
}
template <Type type>
- std::string BitfieldExtract(Operation operation) {
+ Expression BitfieldExtract(Operation operation) {
return GenerateTernary(operation, "bitfieldExtract", type, type, Type::Int, Type::Int);
}
template <Type type>
- std::string BitCount(Operation operation) {
- return GenerateUnary(operation, "bitCount", type, type, false);
+ Expression BitCount(Operation operation) {
+ return GenerateUnary(operation, "bitCount", type, type);
}
- std::string HNegate(Operation operation) {
+ Expression HNegate(Operation operation) {
const auto GetNegate = [&](std::size_t index) {
- return VisitOperand(operation, index, Type::Bool) + " ? -1 : 1";
+ return VisitOperand(operation, index).AsBool() + " ? -1 : 1";
};
- const std::string value =
- fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0, Type::HalfFloat),
- GetNegate(1), GetNegate(2));
- return BitwiseCastResult(value, Type::HalfFloat);
- }
-
- std::string HClamp(Operation operation) {
- const std::string value = VisitOperand(operation, 0, Type::HalfFloat);
- const std::string min = VisitOperand(operation, 1, Type::Float);
- const std::string max = VisitOperand(operation, 2, Type::Float);
- const std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
-
- return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat));
- }
-
- std::string HCastFloat(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Float);
- return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a);
- }
-
- std::string HUnpack(Operation operation) {
- const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)};
- const auto value = [&]() -> std::string {
- switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
- case Tegra::Shader::HalfType::H0_H1:
- return operand;
- case Tegra::Shader::HalfType::F32:
- return fmt::format("vec2(fromHalf2({}))", operand);
- case Tegra::Shader::HalfType::H0_H0:
- return fmt::format("vec2({}[0])", operand);
- case Tegra::Shader::HalfType::H1_H1:
- return fmt::format("vec2({}[1])", operand);
- }
- UNREACHABLE();
- return "0";
- }();
- return fmt::format("fromHalf2({})", value);
+ return {fmt::format("({} * vec2({}, {}))", VisitOperand(operation, 0).AsHalfFloat(),
+ GetNegate(1), GetNegate(2)),
+ Type::HalfFloat};
+ }
+
+ Expression HClamp(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsHalfFloat();
+ const std::string min = VisitOperand(operation, 1).AsFloat();
+ const std::string max = VisitOperand(operation, 2).AsFloat();
+ std::string clamped = fmt::format("clamp({}, vec2({}), vec2({}))", value, min, max);
+
+ return ApplyPrecise(operation, std::move(clamped), Type::HalfFloat);
+ }
+
+ Expression HCastFloat(Operation operation) {
+ return {fmt::format("vec2({})", VisitOperand(operation, 0).AsFloat()), Type::HalfFloat};
}
- std::string HMergeF32(Operation operation) {
- return fmt::format("float(toHalf2({})[0])", Visit(operation[0]));
+ Expression HUnpack(Operation operation) {
+ Expression operand = VisitOperand(operation, 0);
+ switch (std::get<Tegra::Shader::HalfType>(operation.GetMeta())) {
+ case Tegra::Shader::HalfType::H0_H1:
+ return operand;
+ case Tegra::Shader::HalfType::F32:
+ return {fmt::format("vec2({})", operand.AsFloat()), Type::HalfFloat};
+ case Tegra::Shader::HalfType::H0_H0:
+ return {fmt::format("vec2({}[0])", operand.AsHalfFloat()), Type::HalfFloat};
+ case Tegra::Shader::HalfType::H1_H1:
+ return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
+ }
+ }
+
+ Expression HMergeF32(Operation operation) {
+ return {fmt::format("float({}[0])", VisitOperand(operation, 0).AsHalfFloat()), Type::Float};
}
- std::string HMergeH0(Operation operation) {
- return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[1]),
- Visit(operation[0]));
+ Expression HMergeH0(Operation operation) {
+ std::string dest = VisitOperand(operation, 0).AsUint();
+ std::string src = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", src, dest), Type::Uint};
}
- std::string HMergeH1(Operation operation) {
- return fmt::format("fromHalf2(vec2(toHalf2({})[0], toHalf2({})[1]))", Visit(operation[0]),
- Visit(operation[1]));
+ Expression HMergeH1(Operation operation) {
+ std::string dest = VisitOperand(operation, 0).AsUint();
+ std::string src = VisitOperand(operation, 1).AsUint();
+ return {fmt::format("(({} & 0x0000FFFFU) | ({} & 0xFFFF0000U))", dest, src), Type::Uint};
}
- std::string HPack2(Operation operation) {
- return fmt::format("utof(packHalf2x16(vec2({}, {})))", Visit(operation[0]),
- Visit(operation[1]));
+ Expression HPack2(Operation operation) {
+ return {fmt::format("vec2({}, {})", VisitOperand(operation, 0).AsFloat(),
+ VisitOperand(operation, 1).AsFloat()),
+ Type::HalfFloat};
}
template <Type type>
- std::string LogicalLessThan(Operation operation) {
+ Expression LogicalLessThan(Operation operation) {
return GenerateBinaryInfix(operation, "<", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalEqual(Operation operation) {
+ Expression LogicalEqual(Operation operation) {
return GenerateBinaryInfix(operation, "==", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalLessEqual(Operation operation) {
+ Expression LogicalLessEqual(Operation operation) {
return GenerateBinaryInfix(operation, "<=", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalGreaterThan(Operation operation) {
+ Expression LogicalGreaterThan(Operation operation) {
return GenerateBinaryInfix(operation, ">", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalNotEqual(Operation operation) {
+ Expression LogicalNotEqual(Operation operation) {
return GenerateBinaryInfix(operation, "!=", Type::Bool, type, type);
}
template <Type type>
- std::string LogicalGreaterEqual(Operation operation) {
+ Expression LogicalGreaterEqual(Operation operation) {
return GenerateBinaryInfix(operation, ">=", Type::Bool, type, type);
}
- std::string LogicalFIsNan(Operation operation) {
- return GenerateUnary(operation, "isnan", Type::Bool, Type::Float, false);
+ Expression LogicalFIsNan(Operation operation) {
+ return GenerateUnary(operation, "isnan", Type::Bool, Type::Float);
}
- std::string LogicalAssign(Operation operation) {
+ Expression LogicalAssign(Operation operation) {
const Node& dest = operation[0];
const Node& src = operation[1];
@@ -1400,78 +1502,80 @@ private:
target = GetInternalFlag(flag->GetFlag());
}
- code.AddLine("{} = {};", target, Visit(src));
+ code.AddLine("{} = {};", target, Visit(src).AsBool());
return {};
}
- std::string LogicalAnd(Operation operation) {
+ Expression LogicalAnd(Operation operation) {
return GenerateBinaryInfix(operation, "&&", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalOr(Operation operation) {
+ Expression LogicalOr(Operation operation) {
return GenerateBinaryInfix(operation, "||", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalXor(Operation operation) {
+ Expression LogicalXor(Operation operation) {
return GenerateBinaryInfix(operation, "^^", Type::Bool, Type::Bool, Type::Bool);
}
- std::string LogicalNegate(Operation operation) {
- return GenerateUnary(operation, "!", Type::Bool, Type::Bool, false);
+ Expression LogicalNegate(Operation operation) {
+ return GenerateUnary(operation, "!", Type::Bool, Type::Bool);
}
- std::string LogicalPick2(Operation operation) {
- const std::string pair = VisitOperand(operation, 0, Type::Bool2);
- return fmt::format("{}[{}]", pair, VisitOperand(operation, 1, Type::Uint));
+ Expression LogicalPick2(Operation operation) {
+ return {fmt::format("{}[{}]", VisitOperand(operation, 0).AsBool2(),
+ VisitOperand(operation, 1).AsUint()),
+ Type::Bool};
}
- std::string LogicalAnd2(Operation operation) {
+ Expression LogicalAnd2(Operation operation) {
return GenerateUnary(operation, "all", Type::Bool, Type::Bool2);
}
template <bool with_nan>
- std::string GenerateHalfComparison(Operation operation, const std::string& compare_op) {
- const std::string comparison{GenerateBinaryCall(operation, compare_op, Type::Bool2,
- Type::HalfFloat, Type::HalfFloat)};
+ Expression GenerateHalfComparison(Operation operation, std::string_view compare_op) {
+ Expression comparison = GenerateBinaryCall(operation, compare_op, Type::Bool2,
+ Type::HalfFloat, Type::HalfFloat);
if constexpr (!with_nan) {
return comparison;
}
- return fmt::format("halfFloatNanComparison({}, {}, {})", comparison,
- VisitOperand(operation, 0, Type::HalfFloat),
- VisitOperand(operation, 1, Type::HalfFloat));
+ return {fmt::format("HalfFloatNanComparison({}, {}, {})", comparison.AsBool2(),
+ VisitOperand(operation, 0).AsHalfFloat(),
+ VisitOperand(operation, 1).AsHalfFloat()),
+ Type::Bool2};
}
template <bool with_nan>
- std::string Logical2HLessThan(Operation operation) {
+ Expression Logical2HLessThan(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "lessThan");
}
template <bool with_nan>
- std::string Logical2HEqual(Operation operation) {
+ Expression Logical2HEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "equal");
}
template <bool with_nan>
- std::string Logical2HLessEqual(Operation operation) {
+ Expression Logical2HLessEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "lessThanEqual");
}
template <bool with_nan>
- std::string Logical2HGreaterThan(Operation operation) {
+ Expression Logical2HGreaterThan(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "greaterThan");
}
template <bool with_nan>
- std::string Logical2HNotEqual(Operation operation) {
+ Expression Logical2HNotEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "notEqual");
}
template <bool with_nan>
- std::string Logical2HGreaterEqual(Operation operation) {
+ Expression Logical2HGreaterEqual(Operation operation) {
return GenerateHalfComparison<with_nan>(operation, "greaterThanEqual");
}
- std::string Texture(Operation operation) {
+ Expression Texture(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1480,10 +1584,10 @@ private:
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
- return expr + GetSwizzle(meta->element);
+ return {expr + GetSwizzle(meta->element), Type::Float};
}
- std::string TextureLod(Operation operation) {
+ Expression TextureLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
@@ -1492,54 +1596,54 @@ private:
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
- return expr + GetSwizzle(meta->element);
+ return {expr + GetSwizzle(meta->element), Type::Float};
}
- std::string TextureGather(Operation operation) {
+ Expression TextureGather(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- return GenerateTexture(operation, "Gather",
- {TextureArgument{type, meta->component}, TextureAoffi{}}) +
- GetSwizzle(meta->element);
+ return {GenerateTexture(operation, "Gather",
+ {TextureArgument{type, meta->component}, TextureAoffi{}}) +
+ GetSwizzle(meta->element),
+ Type::Float};
}
- std::string TextureQueryDimensions(Operation operation) {
+ Expression TextureQueryDimensions(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
const std::string sampler = GetSampler(meta->sampler);
- const std::string lod = VisitOperand(operation, 0, Type::Int);
+ const std::string lod = VisitOperand(operation, 0).AsInt();
switch (meta->element) {
case 0:
case 1:
- return fmt::format("itof(int(textureSize({}, {}){}))", sampler, lod,
- GetSwizzle(meta->element));
- case 2:
- return "0";
+ return {fmt::format("textureSize({}, {}){}", sampler, lod, GetSwizzle(meta->element)),
+ Type::Int};
case 3:
- return fmt::format("itof(textureQueryLevels({}))", sampler);
+ return {fmt::format("textureQueryLevels({})", sampler), Type::Int};
}
UNREACHABLE();
- return "0";
+ return {"0", Type::Int};
}
- std::string TextureQueryLod(Operation operation) {
+ Expression TextureQueryLod(Operation operation) {
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
if (meta->element < 2) {
- return fmt::format("itof(int(({} * vec2(256)){}))",
- GenerateTexture(operation, "QueryLod", {}),
- GetSwizzle(meta->element));
+ return {fmt::format("int(({} * vec2(256)){})",
+ GenerateTexture(operation, "QueryLod", {}),
+ GetSwizzle(meta->element)),
+ Type::Int};
}
- return "0";
+ return {"0", Type::Int};
}
- std::string TexelFetch(Operation operation) {
- constexpr std::array<const char*, 4> constructors = {"int", "ivec2", "ivec3", "ivec4"};
+ Expression TexelFetch(Operation operation) {
+ constexpr std::array constructors = {"int", "ivec2", "ivec3", "ivec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
UNIMPLEMENTED_IF(meta->sampler.IsArray());
@@ -1552,7 +1656,7 @@ private:
expr += constructors.at(operation.GetOperandsCount() - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += VisitOperand(operation, i, Type::Int);
+ expr += VisitOperand(operation, i).AsInt();
const std::size_t next = i + 1;
if (next == count)
expr += ')';
@@ -1565,7 +1669,7 @@ private:
if (meta->lod) {
expr += ", ";
- expr += CastOperand(Visit(meta->lod), Type::Int);
+ expr += Visit(meta->lod).AsInt();
}
expr += ')';
expr += GetSwizzle(meta->element);
@@ -1580,11 +1684,11 @@ private:
code.AddLine("float {} = {};", tmp, expr);
code.AddLine("#endif");
- return tmp;
+ return {tmp, Type::Float};
}
- std::string ImageStore(Operation operation) {
- constexpr std::array<const char*, 4> constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
+ Expression ImageStore(Operation operation) {
+ constexpr std::array constructors{"int(", "ivec2(", "ivec3(", "ivec4("};
const auto meta{std::get<MetaImage>(operation.GetMeta())};
std::string expr = "imageStore(";
@@ -1594,7 +1698,7 @@ private:
const std::size_t coords_count{operation.GetOperandsCount()};
expr += constructors.at(coords_count - 1);
for (std::size_t i = 0; i < coords_count; ++i) {
- expr += VisitOperand(operation, i, Type::Int);
+ expr += VisitOperand(operation, i).AsInt();
if (i + 1 < coords_count) {
expr += ", ";
}
@@ -1605,7 +1709,7 @@ private:
UNIMPLEMENTED_IF(values_count != 4);
expr += "vec4(";
for (std::size_t i = 0; i < values_count; ++i) {
- expr += Visit(meta.values.at(i));
+ expr += Visit(meta.values.at(i)).AsFloat();
if (i + 1 < values_count) {
expr += ", ";
}
@@ -1616,52 +1720,52 @@ private:
return {};
}
- std::string Branch(Operation operation) {
+ Expression Branch(Operation operation) {
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("jmp_to = 0x{:x}u;", target->GetValue());
+ code.AddLine("jmp_to = 0x{:X}U;", target->GetValue());
code.AddLine("break;");
return {};
}
- std::string BranchIndirect(Operation operation) {
- const std::string op_a = VisitOperand(operation, 0, Type::Uint);
+ Expression BranchIndirect(Operation operation) {
+ const std::string op_a = VisitOperand(operation, 0).AsUint();
code.AddLine("jmp_to = {};", op_a);
code.AddLine("break;");
return {};
}
- std::string PushFlowStack(Operation operation) {
+ Expression PushFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack),
+ code.AddLine("{}[{}++] = 0x{:X}U;", FlowStackName(stack), FlowStackTopName(stack),
target->GetValue());
return {};
}
- std::string PopFlowStack(Operation operation) {
+ Expression PopFlowStack(Operation operation) {
const auto stack = std::get<MetaStackClass>(operation.GetMeta());
code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
code.AddLine("break;");
return {};
}
- std::string Exit(Operation operation) {
+ Expression Exit(Operation operation) {
if (stage != ProgramType::Fragment) {
code.AddLine("return;");
return {};
}
const auto& used_registers = ir.GetRegisters();
- const auto SafeGetRegister = [&](u32 reg) -> std::string {
+ const auto SafeGetRegister = [&](u32 reg) -> Expression {
// TODO(Rodrigo): Replace with contains once C++20 releases
if (used_registers.find(reg) != used_registers.end()) {
- return GetRegister(reg);
+ return {GetRegister(reg), Type::Float};
}
- return "0.0f";
+ return {"0.0f", Type::Float};
};
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
@@ -1674,7 +1778,7 @@ private:
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
code.AddLine("FragColor{}[{}] = {};", render_target, component,
- SafeGetRegister(current_reg));
+ SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
}
@@ -1683,14 +1787,14 @@ private:
if (header.ps.omap.depth) {
// The depth output is always 2 registers after the last color output, and current_reg
// already contains one past the last color register.
- code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1));
+ code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
}
code.AddLine("return;");
return {};
}
- std::string Discard(Operation operation) {
+ Expression Discard(Operation operation) {
// Enclose "discard" in a conditional, so that GLSL compilation does not complain
// about unexecuted instructions that may follow this.
code.AddLine("if (true) {{");
@@ -1701,7 +1805,7 @@ private:
return {};
}
- std::string EmitVertex(Operation operation) {
+ Expression EmitVertex(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EmitVertex is expected to be used in a geometry shader.");
@@ -1712,7 +1816,7 @@ private:
return {};
}
- std::string EndPrimitive(Operation operation) {
+ Expression EndPrimitive(Operation operation) {
ASSERT_MSG(stage == ProgramType::Geometry,
"EndPrimitive is expected to be used in a geometry shader.");
@@ -1720,19 +1824,61 @@ private:
return {};
}
- std::string YNegate(Operation operation) {
+ Expression YNegate(Operation operation) {
// Config pack's third value is Y_NEGATE's state.
- return "uintBitsToFloat(config_pack[2])";
+ return {"config_pack[2]", Type::Uint};
}
template <u32 element>
- std::string LocalInvocationId(Operation) {
- return "utof(gl_LocalInvocationID"s + GetSwizzle(element) + ')';
+ Expression LocalInvocationId(Operation) {
+ return {"gl_LocalInvocationID"s + GetSwizzle(element), Type::Uint};
}
template <u32 element>
- std::string WorkGroupId(Operation) {
- return "utof(gl_WorkGroupID"s + GetSwizzle(element) + ')';
+ Expression WorkGroupId(Operation) {
+ return {"gl_WorkGroupID"s + GetSwizzle(element), Type::Uint};
+ }
+
+ Expression BallotThread(Operation operation) {
+ const std::string value = VisitOperand(operation, 0).AsBool();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "Nvidia warp intrinsics are not available and its required by a shader");
+ // Stub on non-Nvidia devices by simulating all threads voting the same as the active
+ // one.
+ return {fmt::format("({} ? 0xFFFFFFFFU : 0U)", value), Type::Uint};
+ }
+ return {fmt::format("ballotThreadNV({})", value), Type::Uint};
+ }
+
+ Expression Vote(Operation operation, const char* func) {
+ const std::string value = VisitOperand(operation, 0).AsBool();
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "Nvidia vote intrinsics are not available and its required by a shader");
+ // Stub with a warp size of one.
+ return {value, Type::Bool};
+ }
+ return {fmt::format("{}({})", func, value), Type::Bool};
+ }
+
+ Expression VoteAll(Operation operation) {
+ return Vote(operation, "allThreadsNV");
+ }
+
+ Expression VoteAny(Operation operation) {
+ return Vote(operation, "anyThreadNV");
+ }
+
+ Expression VoteEqual(Operation operation) {
+ if (!device.HasWarpIntrinsics()) {
+ LOG_ERROR(Render_OpenGL,
+ "Nvidia vote intrinsics are not available and its required by a shader");
+ // We must return true here since a stub for a theoretical warp size of 1 will always
+ // return an equal result for all its votes.
+ return {"true", Type::Bool};
+ }
+ return Vote(operation, "allThreadsEqualNV");
}
static constexpr std::array operation_decompilers = {
@@ -1885,6 +2031,11 @@ private:
&GLSLDecompiler::WorkGroupId<0>,
&GLSLDecompiler::WorkGroupId<1>,
&GLSLDecompiler::WorkGroupId<2>,
+
+ &GLSLDecompiler::BallotThread,
+ &GLSLDecompiler::VoteAll,
+ &GLSLDecompiler::VoteAny,
+ &GLSLDecompiler::VoteEqual,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
@@ -1926,8 +2077,8 @@ private:
}
std::string GetInternalFlag(InternalFlag flag) const {
- constexpr std::array<const char*, 4> InternalFlagNames = {"zero_flag", "sign_flag",
- "carry_flag", "overflow_flag"};
+ constexpr std::array InternalFlagNames = {"zero_flag", "sign_flag", "carry_flag",
+ "overflow_flag"};
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
@@ -1975,24 +2126,16 @@ private:
std::string GetCommonDeclarations() {
return fmt::format(
- "#define MAX_CONSTBUFFER_ELEMENTS {}\n"
"#define ftoi floatBitsToInt\n"
"#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n"
"#define utof uintBitsToFloat\n\n"
- "float fromHalf2(vec2 pair) {{\n"
- " return utof(packHalf2x16(pair));\n"
- "}}\n\n"
- "vec2 toHalf2(float value) {{\n"
- " return unpackHalf2x16(ftou(value));\n"
- "}}\n\n"
- "bvec2 halfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
+ "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
" bvec2 is_nan1 = isnan(pair1);\n"
" bvec2 is_nan2 = isnan(pair2);\n"
" return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
"is_nan2.y);\n"
- "}}\n",
- MAX_CONSTBUFFER_ELEMENTS);
+ "}}\n\n");
}
ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 408332f90..4f135fe03 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -184,6 +184,9 @@ GLint GetSwizzleSource(SwizzleSource source) {
}
void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
+ if (params.IsBuffer()) {
+ return;
+ }
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
@@ -208,6 +211,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(),
nullptr, GL_DYNAMIC_STORAGE_BIT);
glTextureBuffer(texture.handle, internal_format, texture_buffer.handle);
+ break;
case SurfaceTarget::Texture2D:
case SurfaceTarget::TextureCubemap:
glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index ff6ab6988..21324488a 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -51,7 +51,7 @@ public:
}
protected:
- void DecorateSurfaceName();
+ void DecorateSurfaceName() override;
View CreateView(const ViewParams& view_key) override;
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index a05cef3b9..af9684839 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -101,9 +101,7 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
RendererOpenGL::~RendererOpenGL() = default;
-void RendererOpenGL::SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) {
-
+void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
system.GetPerfStats().EndSystemFrame();
// Maintain the rasterizer's state as a priority
@@ -113,9 +111,9 @@ void RendererOpenGL::SwapBuffers(
if (framebuffer) {
// If framebuffer is provided, reload it from memory to a texture
- if (screen_info.texture.width != (GLsizei)framebuffer->get().width ||
- screen_info.texture.height != (GLsizei)framebuffer->get().height ||
- screen_info.texture.pixel_format != framebuffer->get().pixel_format) {
+ if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
+ screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
+ screen_info.texture.pixel_format != framebuffer->pixel_format) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
@@ -149,43 +147,43 @@ void RendererOpenGL::SwapBuffers(
* Loads framebuffer from emulated memory into the active OpenGL texture.
*/
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
- const u32 bytes_per_pixel{Tegra::FramebufferConfig::BytesPerPixel(framebuffer.pixel_format)};
- const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
- const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
-
// Framebuffer orientation handling
framebuffer_transform_flags = framebuffer.transform_flags;
framebuffer_crop_rect = framebuffer.crop_rect;
- // Ensure no bad interactions with GL_UNPACK_ALIGNMENT, which by default
- // only allows rows to have a memory alignement of 4.
- ASSERT(framebuffer.stride % 4 == 0);
-
- if (!rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
- // Reset the screen info's display texture to its own permanent texture
- screen_info.display_texture = screen_info.texture.resource.handle;
-
- rasterizer->FlushRegion(ToCacheAddr(Memory::GetPointer(framebuffer_addr)), size_in_bytes);
-
- constexpr u32 linear_bpp = 4;
- VideoCore::MortonCopyPixels128(VideoCore::MortonSwizzleMode::MortonToLinear,
- framebuffer.width, framebuffer.height, bytes_per_pixel,
- linear_bpp, Memory::GetPointer(framebuffer_addr),
- gl_framebuffer_data.data());
-
- glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
+ const VAddr framebuffer_addr{framebuffer.address + framebuffer.offset};
+ if (rasterizer->AccelerateDisplay(framebuffer, framebuffer_addr, framebuffer.stride)) {
+ return;
+ }
- // Update existing texture
- // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
- // they differ from the LCD resolution.
- // TODO: Applications could theoretically crash yuzu here by specifying too large
- // framebuffer sizes. We should make sure that this cannot happen.
- glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
- framebuffer.height, screen_info.texture.gl_format,
- screen_info.texture.gl_type, gl_framebuffer_data.data());
+ // Reset the screen info's display texture to its own permanent texture
+ screen_info.display_texture = screen_info.texture.resource.handle;
- glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
- }
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
+ const auto host_ptr{Memory::GetPointer(framebuffer_addr)};
+ rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
+
+ // TODO(Rodrigo): Read this from HLE
+ constexpr u32 block_height_log2 = 4;
+ VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
+ framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
+ gl_framebuffer_data.data(), host_ptr);
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
+
+ // Update existing texture
+ // TODO: Test what happens on hardware when you change the framebuffer dimensions so that
+ // they differ from the LCD resolution.
+ // TODO: Applications could theoretically crash yuzu here by specifying too large
+ // framebuffer sizes. We should make sure that this cannot happen.
+ glTextureSubImage2D(screen_info.texture.resource.handle, 0, 0, 0, framebuffer.width,
+ framebuffer.height, screen_info.texture.gl_format,
+ screen_info.texture.gl_type, gl_framebuffer_data.data());
+
+ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
}
/**
@@ -276,22 +274,29 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
texture.height = framebuffer.height;
texture.pixel_format = framebuffer.pixel_format;
+ const auto pixel_format{
+ VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
+ const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
+ gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
+
GLint internal_format;
switch (framebuffer.pixel_format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- gl_framebuffer_data.resize(texture.width * texture.height * 4);
+ break;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ internal_format = GL_RGB565;
+ texture.gl_format = GL_RGB;
+ texture.gl_type = GL_UNSIGNED_SHORT_5_6_5;
break;
default:
internal_format = GL_RGBA8;
texture.gl_format = GL_RGBA;
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
- gl_framebuffer_data.resize(texture.width * texture.height * 4);
- LOG_CRITICAL(Render_OpenGL, "Unknown framebuffer pixel format: {}",
- static_cast<u32>(framebuffer.pixel_format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+ static_cast<u32>(framebuffer.pixel_format));
}
texture.resource.Release();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 4aebf2321..9bd086368 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -43,14 +43,13 @@ struct ScreenInfo {
TextureInfo texture;
};
-class RendererOpenGL : public VideoCore::RendererBase {
+class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
~RendererOpenGL() override;
/// Swap buffers (render frame)
- void SwapBuffers(
- std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
/// Initialize the renderer
bool Init() override;
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 24a591797..a35b45c9c 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1072,6 +1072,26 @@ private:
return {};
}
+ Id BallotThread(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id VoteAll(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id VoteAny(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
+ Id VoteEqual(Operation) {
+ UNIMPLEMENTED();
+ return {};
+ }
+
Id DeclareBuiltIn(spv::BuiltIn builtin, spv::StorageClass storage, Id type,
const std::string& name) {
const Id id = OpVariable(type, storage);
@@ -1364,6 +1384,11 @@ private:
&SPIRVDecompiler::WorkGroupId<0>,
&SPIRVDecompiler::WorkGroupId<1>,
&SPIRVDecompiler::WorkGroupId<2>,
+
+ &SPIRVDecompiler::BallotThread,
+ &SPIRVDecompiler::VoteAll,
+ &SPIRVDecompiler::VoteAny,
+ &SPIRVDecompiler::VoteEqual,
};
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index b547d8323..47a9fd961 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -176,6 +176,7 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
{OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
{OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
{OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
+ {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
{OpCode::Type::Memory, &ShaderIR::DecodeMemory},
{OpCode::Type::Texture, &ShaderIR::DecodeTexture},
{OpCode::Type::Image, &ShaderIR::DecodeImage},
diff --git a/src/video_core/shader/decode/conversion.cpp b/src/video_core/shader/decode/conversion.cpp
index 8973fbefa..32facd6ba 100644
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@@ -14,6 +14,12 @@ using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
using Tegra::Shader::Register;
+namespace {
+constexpr OperationCode GetFloatSelector(u64 selector) {
+ return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
+}
+} // Anonymous namespace
+
u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
@@ -22,7 +28,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2I_R:
case OpCode::Id::I2I_C:
case OpCode::Id::I2I_IMM: {
- UNIMPLEMENTED_IF(instr.conversion.selector);
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word);
UNIMPLEMENTED_IF(instr.alu.saturate_d);
@@ -57,8 +63,8 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
case OpCode::Id::I2F_R:
case OpCode::Id::I2F_C:
case OpCode::Id::I2F_IMM: {
+ UNIMPLEMENTED_IF(instr.conversion.int_src.selector != 0);
UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
- UNIMPLEMENTED_IF(instr.conversion.selector);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in I2F is not implemented");
@@ -113,8 +119,10 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}();
if (instr.conversion.src_size == Register::Size::Short) {
- // TODO: figure where extract is sey in the encoding
- value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+ value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+ std::move(value));
+ } else {
+ ASSERT(instr.conversion.float_src.selector == 0);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
@@ -169,8 +177,10 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
}();
if (instr.conversion.src_size == Register::Size::Short) {
- // TODO: figure where extract is sey in the encoding
- value = Operation(OperationCode::FCastHalf0, PRECISE, value);
+ value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
+ std::move(value));
+ } else {
+ ASSERT(instr.conversion.float_src.selector == 0);
}
value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
diff --git a/src/video_core/shader/decode/float_set.cpp b/src/video_core/shader/decode/float_set.cpp
index f5013e44a..5614e8a0d 100644
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
instr.fset.neg_a != 0);
diff --git a/src/video_core/shader/decode/float_set_predicate.cpp b/src/video_core/shader/decode/float_set_predicate.cpp
index 2323052b0..200c2c983 100644
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@@ -16,10 +16,9 @@ using Tegra::Shader::Pred;
u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
- const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
- instr.fsetp.neg_a != 0);
+ Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
+ instr.fsetp.neg_a != 0);
Node op_b = [&]() {
if (instr.is_b_imm) {
return GetImmediate19(instr);
@@ -29,12 +28,13 @@ u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
}
}();
- op_b = GetOperandAbsNegFloat(op_b, instr.fsetp.abs_b, false);
+ op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
// We can't use the constant predicate as destination.
ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
- const Node predicate = GetPredicateComparisonFloat(instr.fsetp.cond, op_a, op_b);
+ const Node predicate =
+ GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index afea33e5f..840694527 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -42,9 +42,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
cond = instr.hsetp2.reg.cond;
h_and = instr.hsetp2.reg.h_and;
op_b =
- UnpackHalfFloat(GetOperandAbsNegHalf(GetRegister(instr.gpr20), instr.hsetp2.reg.abs_b,
- instr.hsetp2.reg.negate_b),
- instr.hsetp2.reg.type_b);
+ GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
+ instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
break;
default:
UNREACHABLE();
@@ -52,22 +51,22 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
}
const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
- const Node combined_pred = GetPredicate(instr.hsetp2.pred3, instr.hsetp2.neg_pred);
+ const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
const auto Write = [&](u64 dest, Node src) {
SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
};
const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
- const u64 first = instr.hsetp2.pred0;
- const u64 second = instr.hsetp2.pred39;
+ const u64 first = instr.hsetp2.pred3;
+ const u64 second = instr.hsetp2.pred0;
if (h_and) {
- const Node joined = Operation(OperationCode::LogicalAnd2, comparison);
+ Node joined = Operation(OperationCode::LogicalAnd2, comparison);
Write(first, joined);
- Write(second, Operation(OperationCode::LogicalNegate, joined));
+ Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
} else {
- Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0u)));
- Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1u)));
+ Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
+ Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
}
return pc;
diff --git a/src/video_core/shader/decode/integer_set.cpp b/src/video_core/shader/decode/integer_set.cpp
index 46e3d5905..59809bcd8 100644
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@@ -14,7 +14,6 @@ using Tegra::Shader::OpCode;
u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = [&]() {
diff --git a/src/video_core/shader/decode/integer_set_predicate.cpp b/src/video_core/shader/decode/integer_set_predicate.cpp
index dd20775d7..25e48fef8 100644
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@@ -16,7 +16,6 @@ using Tegra::Shader::Pred;
u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index ac0e764d6..d46e0f823 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -74,6 +74,13 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
case SystemVariable::InvocationInfo:
LOG_WARNING(HW_GPU, "MOV_SYS instruction with InvocationInfo is incomplete");
return Immediate(0u);
+ case SystemVariable::Tid: {
+ Node value = Immediate(0);
+ value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdX), 0, 9);
+ value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdY), 16, 9);
+ value = BitfieldInsert(value, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
+ return value;
+ }
case SystemVariable::TidX:
return Operation(OperationCode::LocalInvocationIdX);
case SystemVariable::TidY:
diff --git a/src/video_core/shader/decode/predicate_set_register.cpp b/src/video_core/shader/decode/predicate_set_register.cpp
index febbfeb50..84dbc50fe 100644
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@@ -15,7 +15,6 @@ using Tegra::Shader::OpCode;
u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
- const auto opcode = OpCode::Decode(instr);
UNIMPLEMENTED_IF_MSG(instr.generates_cc,
"Condition codes generation in PSET is not implemented");
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
new file mode 100644
index 000000000..04ca74f46
--- /dev/null
+++ b/src/video_core/shader/decode/warp.cpp
@@ -0,0 +1,55 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/node_helper.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::Instruction;
+using Tegra::Shader::OpCode;
+using Tegra::Shader::Pred;
+using Tegra::Shader::VoteOperation;
+
+namespace {
+OperationCode GetOperationCode(VoteOperation vote_op) {
+ switch (vote_op) {
+ case VoteOperation::All:
+ return OperationCode::VoteAll;
+ case VoteOperation::Any:
+ return OperationCode::VoteAny;
+ case VoteOperation::Eq:
+ return OperationCode::VoteEqual;
+ default:
+ UNREACHABLE_MSG("Invalid vote operation={}", static_cast<u64>(vote_op));
+ return OperationCode::VoteAll;
+ }
+}
+} // Anonymous namespace
+
+u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
+ const Instruction instr = {program_code[pc]};
+ const auto opcode = OpCode::Decode(instr);
+
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::VOTE: {
+ const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
+ const Node active = Operation(OperationCode::BallotThread, value);
+ const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
+ SetRegister(bb, instr.gpr0, active);
+ SetPredicate(bb, instr.vote.dest_pred, vote);
+ break;
+ }
+ default:
+ UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
+ break;
+ }
+
+ return pc;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 5f0852364..5db9313c4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -168,6 +168,11 @@ enum class OperationCode {
WorkGroupIdY, /// () -> uint
WorkGroupIdZ, /// () -> uint
+ BallotThread, /// (bool) -> uint
+ VoteAll, /// (bool) -> bool
+ VoteAny, /// (bool) -> bool
+ VoteEqual, /// (bool) -> bool
+
Amount,
};
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 5e91fe129..1e5c7f660 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -405,4 +405,9 @@ Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
Immediate(offset), Immediate(bits));
}
+Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
+ return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
+ Immediate(bits));
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 59a083d90..bcc9b79b6 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -167,6 +167,7 @@ private:
u32 DecodeFfma(NodeBlock& bb, u32 pc);
u32 DecodeHfma2(NodeBlock& bb, u32 pc);
u32 DecodeConversion(NodeBlock& bb, u32 pc);
+ u32 DecodeWarp(NodeBlock& bb, u32 pc);
u32 DecodeMemory(NodeBlock& bb, u32 pc);
u32 DecodeTexture(NodeBlock& bb, u32 pc);
u32 DecodeImage(NodeBlock& bb, u32 pc);
@@ -279,6 +280,9 @@ private:
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
+ /// Inserts a sequence of bits from a node
+ Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
+
void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
const Node4& components);
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index c50f6354d..4ceb219be 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -445,11 +445,12 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
switch (format) {
case Tegra::FramebufferConfig::PixelFormat::ABGR8:
return PixelFormat::ABGR8U;
+ case Tegra::FramebufferConfig::PixelFormat::RGB565:
+ return PixelFormat::B5G6R5U;
case Tegra::FramebufferConfig::PixelFormat::BGRA8:
return PixelFormat::BGRA8;
default:
- LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
- UNREACHABLE();
+ UNIMPLEMENTED_MSG("Unimplemented format={}", static_cast<u32>(format));
return PixelFormat::ABGR8U;
}
}
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 358d6757c..e7ef66ee2 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -58,7 +58,6 @@ public:
std::size_t GetHostSizeInBytes() const {
std::size_t host_size_in_bytes;
if (GetCompressionType() == SurfaceCompression::Converted) {
- constexpr std::size_t rgb8_bpp = 4ULL;
// ASTC is uncompressed in software, in emulated as RGBA8
host_size_in_bytes = 0;
for (u32 level = 0; level < num_levels; ++level) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index a3a3770a7..2ec0203d1 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -308,8 +308,6 @@ protected:
if (!guard_render_targets && surface->IsRenderTarget()) {
ManageRenderTargetUnregister(surface);
}
- const GPUVAddr gpu_addr = surface->GetGpuAddr();
- const CacheAddr cache_ptr = surface->GetCacheAddr();
const std::size_t size = surface->GetSizeInBytes();
const VAddr cpu_addr = surface->GetCpuAddr();
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 7e8295944..7df5f1452 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -257,19 +257,21 @@ std::vector<u8> UnswizzleTexture(u8* address, u32 tile_size_x, u32 tile_size_y,
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data,
- u32 block_height_bit) {
+ u32 block_height_bit, u32 offset_x, u32 offset_y) {
const u32 block_height = 1U << block_height_bit;
const u32 image_width_in_gobs{(swizzled_width * bytes_per_pixel + (gob_size_x - 1)) /
gob_size_x};
for (u32 line = 0; line < subrect_height; ++line) {
+ const u32 dst_y = line + offset_y;
const u32 gob_address_y =
- (line / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
- ((line % (gob_size_y * block_height)) / gob_size_y) * gob_size;
- const auto& table = legacy_swizzle_table[line % gob_size_y];
+ (dst_y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs +
+ ((dst_y % (gob_size_y * block_height)) / gob_size_y) * gob_size;
+ const auto& table = legacy_swizzle_table[dst_y % gob_size_y];
for (u32 x = 0; x < subrect_width; ++x) {
+ const u32 dst_x = x + offset_x;
const u32 gob_address =
- gob_address_y + (x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
- const u32 swizzled_offset = gob_address + table[(x * bytes_per_pixel) % gob_size_x];
+ gob_address_y + (dst_x * bytes_per_pixel / gob_size_x) * gob_size * block_height;
+ const u32 swizzled_offset = gob_address + table[(dst_x * bytes_per_pixel) % gob_size_x];
u8* source_line = unswizzled_data + line * source_pitch + x * bytes_per_pixel;
u8* dest_addr = swizzled_data + swizzled_offset;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index eaec9b5a5..f1e3952bc 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -44,7 +44,8 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height
/// Copies an untiled subrectangle into a tiled surface.
void SwizzleSubrect(u32 subrect_width, u32 subrect_height, u32 source_pitch, u32 swizzled_width,
- u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height);
+ u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height,
+ u32 offset_x, u32 offset_y);
/// Copies a tiled subrectangle into a linear surface.
void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 swizzled_width,
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e3be018b9..e36bc2c04 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -213,7 +213,7 @@ struct TICEntry {
if (header_version != TICHeaderVersion::OneDBuffer) {
return width_minus_1 + 1;
}
- return (buffer_high_width_minus_one << 16) | buffer_low_width_minus_one;
+ return ((buffer_high_width_minus_one << 16) | buffer_low_width_minus_one) + 1;
}
u32 Height() const {