diff options
Diffstat (limited to 'src/video_core')
26 files changed, 418 insertions, 155 deletions
diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 713c14182..0b77afc71 100644 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -12,7 +12,7 @@ namespace Tegra { -DmaPusher::DmaPusher(GPU& gpu) : gpu(gpu) {} +DmaPusher::DmaPusher(Core::System& system, GPU& gpu) : gpu{gpu}, system{system} {} DmaPusher::~DmaPusher() = default; @@ -26,7 +26,7 @@ void DmaPusher::DispatchCalls() { dma_pushbuffer_subindex = 0; - while (Core::System::GetInstance().IsPoweredOn()) { + while (system.IsPoweredOn()) { if (!Step()) { break; } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 6ab06518f..d6188614a 100644 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -10,6 +10,10 @@ #include "common/bit_field.h" #include "common/common_types.h" +namespace Core { +class System; +} + namespace Tegra { enum class SubmissionMode : u32 { @@ -56,7 +60,7 @@ using CommandList = std::vector<Tegra::CommandListHeader>; */ class DmaPusher { public: - explicit DmaPusher(GPU& gpu); + explicit DmaPusher(Core::System& system, GPU& gpu); ~DmaPusher(); void Push(CommandList&& entries) { @@ -72,8 +76,6 @@ private: void CallMethod(u32 argument) const; - GPU& gpu; - std::vector<CommandHeader> command_headers; ///< Buffer for list of commands fetched at once std::queue<CommandList> dma_pushbuffer; ///< Queue of command lists to be processed @@ -92,6 +94,9 @@ private: GPUVAddr dma_mget{}; ///< main pushbuffer last read address bool ib_enable{true}; ///< IB mode enabled + + GPU& gpu; + Core::System& system; }; } // namespace Tegra diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 5e9cfba22..7231597d4 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1507,7 +1507,7 @@ union Instruction { TextureType GetTextureType() const { // The TLDS instruction has a weird encoding for the texture type. - if (texture_info >= 0 && texture_info <= 1) { + if (texture_info <= 1) { return TextureType::Texture1D; } if (texture_info == 2 || texture_info == 8 || texture_info == 12 || diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8acf2eda2..a606f4abd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -27,7 +27,7 @@ GPU::GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& render : system{system}, renderer{std::move(renderer_)}, is_async{is_async} { auto& rasterizer{renderer->Rasterizer()}; memory_manager = std::make_unique<Tegra::MemoryManager>(system, rasterizer); - dma_pusher = std::make_unique<Tegra::DmaPusher>(*this); + dma_pusher = std::make_unique<Tegra::DmaPusher>(system, *this); maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, rasterizer, *memory_manager); fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer); kepler_compute = std::make_unique<Engines::KeplerCompute>(system, rasterizer, *memory_manager); diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index a3389d0d2..fd49bc2a9 100644 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -6,8 +6,8 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/hle/kernel/memory/page_table.h" #include "core/hle/kernel/process.h" -#include "core/hle/kernel/vm_manager.h" #include "core/memory.h" #include "video_core/gpu.h" #include "video_core/memory_manager.h" @@ -17,10 +17,7 @@ namespace Tegra { MemoryManager::MemoryManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer}, system{system} { - std::fill(page_table.pointers.begin(), page_table.pointers.end(), nullptr); - std::fill(page_table.attributes.begin(), page_table.attributes.end(), - Common::PageType::Unmapped); - page_table.Resize(address_space_width); + page_table.Resize(address_space_width, page_bits, false); // Initialize the map with a single free region covering the entire managed space. VirtualMemoryArea initial_vma; @@ -55,9 +52,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, u64 size) { MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; @@ -70,9 +67,9 @@ GPUVAddr MemoryManager::MapBufferEx(VAddr cpu_addr, GPUVAddr gpu_addr, u64 size) MapBackingMemory(gpu_addr, system.Memory().GetPointer(cpu_addr), aligned_size, cpu_addr); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr, size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::DeviceMapped) + ->PageTable() + .SetMemoryAttribute(cpu_addr, size, Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::DeviceShared) .IsSuccess()); return gpu_addr; } @@ -89,9 +86,10 @@ GPUVAddr MemoryManager::UnmapBuffer(GPUVAddr gpu_addr, u64 size) { UnmapRange(gpu_addr, aligned_size); ASSERT(system.CurrentProcess() - ->VMManager() - .SetMemoryAttribute(cpu_addr.value(), size, Kernel::MemoryAttribute::DeviceMapped, - Kernel::MemoryAttribute::None) + ->PageTable() + .SetMemoryAttribute(cpu_addr.value(), size, + Kernel::Memory::MemoryAttribute::DeviceShared, + Kernel::Memory::MemoryAttribute::None) .IsSuccess()); return gpu_addr; @@ -147,16 +145,8 @@ T MemoryManager::Read(GPUVAddr addr) const { return value; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Read{} @ 0x{:08X}", sizeof(T) * 8, addr); - return 0; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); + return {}; } @@ -173,17 +163,7 @@ void MemoryManager::Write(GPUVAddr addr, T data) { return; } - switch (page_table.attributes[addr >> page_bits]) { - case Common::PageType::Unmapped: - LOG_ERROR(HW_GPU, "Unmapped Write{} 0x{:08X} @ 0x{:016X}", sizeof(data) * 8, - static_cast<u32>(data), addr); - return; - case Common::PageType::Memory: - ASSERT_MSG(false, "Mapped memory page without a pointer @ {:016X}", addr); - break; - default: - UNREACHABLE(); - } + UNREACHABLE(); } template u8 MemoryManager::Read<u8>(GPUVAddr addr) const; @@ -249,18 +229,11 @@ void MemoryManager::ReadBlock(GPUVAddr src_addr, void* dest_buffer, const std::s const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; - // Flush must happen on the rasterizer interface, such that memory is always synchronous - // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. - rasterizer.FlushRegion(src_addr, copy_amount); - memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr src_addr{page_table.backing_addr[page_index] + page_offset}; + // Flush must happen on the rasterizer interface, such that memory is always synchronous + // when it is read (even when in asynchronous GPU mode). Fixes Dead Cells title menu. + rasterizer.FlushRegion(src_addr, copy_amount); + memory.ReadBlockUnsafe(src_addr, dest_buffer, copy_amount); page_index++; page_offset = 0; @@ -305,18 +278,11 @@ void MemoryManager::WriteBlock(GPUVAddr dest_addr, const void* src_buffer, const const std::size_t copy_amount{ std::min(static_cast<std::size_t>(page_size) - page_offset, remaining_size)}; - switch (page_table.attributes[page_index]) { - case Common::PageType::Memory: { - const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; - // Invalidate must happen on the rasterizer interface, such that memory is always - // synchronous when it is written (even when in asynchronous GPU mode). - rasterizer.InvalidateRegion(dest_addr, copy_amount); - memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); - break; - } - default: - UNREACHABLE(); - } + const VAddr dest_addr{page_table.backing_addr[page_index] + page_offset}; + // Invalidate must happen on the rasterizer interface, such that memory is always + // synchronous when it is written (even when in asynchronous GPU mode). + rasterizer.InvalidateRegion(dest_addr, copy_amount); + memory.WriteBlockUnsafe(dest_addr, src_buffer, copy_amount); page_index++; page_offset = 0; @@ -362,8 +328,8 @@ void MemoryManager::CopyBlockUnsafe(GPUVAddr dest_addr, GPUVAddr src_addr, const bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) { const VAddr addr = page_table.backing_addr[gpu_addr >> page_bits]; - const std::size_t page = (addr & Memory::PAGE_MASK) + size; - return page <= Memory::PAGE_SIZE; + const std::size_t page = (addr & Core::Memory::PAGE_MASK) + size; + return page <= Core::Memory::PAGE_SIZE; } void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageType type, @@ -375,12 +341,13 @@ void MemoryManager::MapPages(GPUVAddr base, u64 size, u8* memory, Common::PageTy ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); - std::fill(page_table.attributes.begin() + base, page_table.attributes.begin() + end, type); - if (memory == nullptr) { - std::fill(page_table.pointers.begin() + base, page_table.pointers.begin() + end, memory); - std::fill(page_table.backing_addr.begin() + base, page_table.backing_addr.begin() + end, - backing_addr); + while (base != end) { + page_table.pointers[base] = nullptr; + page_table.backing_addr[base] = 0; + + base += 1; + } } else { while (base != end) { page_table.pointers[base] = memory; diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 0d9468535..0ddd52d5a 100644 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -179,7 +179,7 @@ private: /// End of address space, based on address space in bits. static constexpr GPUVAddr address_space_end{1ULL << address_space_width}; - Common::BackingPageTable page_table{page_bits}; + Common::PageTable page_table; VMAMap vma_map; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index d01db97da..53622ca05 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -23,15 +23,15 @@ constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } // Anonymous namespace -RasterizerAccelerated::RasterizerAccelerated(Memory::Memory& cpu_memory_) +RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { std::lock_guard lock{pages_mutex}; - const u64 page_start{addr >> Memory::PAGE_BITS}; - const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; + const u64 page_start{addr >> Core::Memory::PAGE_BITS}; + const u64 page_end{(addr + size + Core::Memory::PAGE_SIZE - 1) >> Core::Memory::PAGE_BITS}; // Interval maps will erase segments if count reaches 0, so if delta is negative we have to // subtract after iterating @@ -44,8 +44,8 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del const auto interval = pair.first & pages_interval; const int count = pair.second; - const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS; + const VAddr interval_start_addr = boost::icl::first(interval) << Core::Memory::PAGE_BITS; + const VAddr interval_end_addr = boost::icl::last_next(interval) << Core::Memory::PAGE_BITS; const u64 interval_size = interval_end_addr - interval_start_addr; if (delta > 0 && count == delta) { diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 315798e7c..91866d7dd 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -11,7 +11,7 @@ #include "common/common_types.h" #include "video_core/rasterizer_interface.h" -namespace Memory { +namespace Core::Memory { class Memory; } @@ -20,7 +20,7 @@ namespace VideoCore { /// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface. class RasterizerAccelerated : public RasterizerInterface { public: - explicit RasterizerAccelerated(Memory::Memory& cpu_memory_); + explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_); ~RasterizerAccelerated() override; void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; @@ -30,7 +30,7 @@ private: CachedPageMap cached_pages; std::mutex pages_mutex; - Memory::Memory& cpu_memory; + Core::Memory::Memory& cpu_memory; }; } // namespace VideoCore diff --git a/src/video_core/renderer_opengl/gl_query_cache.cpp b/src/video_core/renderer_opengl/gl_query_cache.cpp index f12e9f55f..d7ba57aca 100644 --- a/src/video_core/renderer_opengl/gl_query_cache.cpp +++ b/src/video_core/renderer_opengl/gl_query_cache.cpp @@ -94,9 +94,9 @@ CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept : VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {} CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept { - VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs)); cache = rhs.cache; type = rhs.type; + CachedQueryBase<HostCounter>::operator=(std::move(rhs)); return *this; } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 12c6dcfde..6d2ff20f9 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -34,8 +34,6 @@ namespace OpenGL { using Tegra::Engines::ShaderType; -using VideoCommon::Shader::CompileDepth; -using VideoCommon::Shader::CompilerSettings; using VideoCommon::Shader::ProgramCode; using VideoCommon::Shader::Registry; using VideoCommon::Shader::ShaderIR; @@ -45,7 +43,7 @@ namespace { constexpr u32 STAGE_MAIN_OFFSET = 10; constexpr u32 KERNEL_MAIN_OFFSET = 0; -constexpr CompilerSettings COMPILER_SETTINGS{CompileDepth::FullDecompile}; +constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{}; /// Gets the address for the specified shader stage program GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index b1804e9ea..22242cce9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -835,7 +835,8 @@ private: void DeclareConstantBuffers() { u32 binding = device.GetBaseBindings(stage).uniform_buffer; - for (const auto& [index, cbuf] : ir.GetConstantBuffers()) { + for (const auto& buffers : ir.GetConstantBuffers()) { + const auto index = buffers.first; code.AddLine("layout (std140, binding = {}) uniform {} {{", binding++, GetConstBufferBlock(index)); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); @@ -1144,6 +1145,7 @@ private: return {"gl_FragCoord"s + GetSwizzle(element), Type::Float}; default: UNREACHABLE(); + return {"0", Type::Int}; } case Attribute::Index::FrontColor: return {"gl_Color"s + GetSwizzle(element), Type::Float}; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 89f0e04ef..2c0c77c28 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -191,6 +191,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_LINEAR_MIPMAP_LINEAR; } + break; } case Tegra::Texture::TextureFilter::Nearest: { switch (mip_filter_mode) { @@ -201,6 +202,7 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode, case Tegra::Texture::TextureMipmapFilter::Linear: return GL_NEAREST_MIPMAP_LINEAR; } + break; } } LOG_ERROR(Render_OpenGL, "Unimplemented texture filter mode={}", static_cast<u32>(filter_mode)); diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index dd590c38b..04532f8f8 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -42,7 +42,7 @@ #include <vulkan/vulkan_win32.h> #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) #include <X11/Xlib.h> #include <vulkan/vulkan_wayland.h> #include <vulkan/vulkan_xlib.h> @@ -119,7 +119,7 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc extensions.push_back(VK_KHR_WIN32_SURFACE_EXTENSION_NAME); break; #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) case Core::Frontend::WindowSystemType::X11: extensions.push_back(VK_KHR_XLIB_SURFACE_EXTENSION_NAME); break; @@ -345,7 +345,7 @@ bool RendererVulkan::CreateSurface() { } } #endif -#ifdef __linux__ +#if !defined(_WIN32) && !defined(__APPLE__) if (window_info.type == Core::Frontend::WindowSystemType::X11) { const VkXlibSurfaceCreateInfoKHR xlib_ci{ VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, nullptr, 0, diff --git a/src/video_core/renderer_vulkan/shaders/quad_indexed.comp b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp new file mode 100644 index 000000000..5a472ba9b --- /dev/null +++ b/src/video_core/renderer_vulkan/shaders/quad_indexed.comp @@ -0,0 +1,50 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +/* + * Build instructions: + * $ glslangValidator -V quad_indexed.comp -o output.spv + * $ spirv-opt -O --strip-debug output.spv -o optimized.spv + * $ xxd -i optimized.spv + * + * Then copy that bytecode to the C++ file + */ + +#version 460 core + +layout (local_size_x = 1024) in; + +layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { + uint input_indexes[]; +}; + +layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { + uint output_indexes[]; +}; + +layout (push_constant) uniform PushConstants { + uint base_vertex; + int index_shift; // 0: uint8, 1: uint16, 2: uint32 +}; + +void main() { + int primitive = int(gl_GlobalInvocationID.x); + if (primitive * 6 >= output_indexes.length()) { + return; + } + + int index_size = 8 << index_shift; + int flipped_shift = 2 - index_shift; + int mask = (1 << flipped_shift) - 1; + + const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); + for (uint vertex = 0; vertex < 6; ++vertex) { + int offset = primitive * 4 + quad_swizzle[vertex]; + int int_offset = offset >> flipped_shift; + int bit_offset = (offset & mask) * index_size; + uint packed_input = input_indexes[int_offset]; + uint index = bitfieldExtract(packed_input, bit_offset, index_size); + output_indexes[primitive * 6 + vertex] = index + base_vertex; + } +} diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index 878a78755..7b0268033 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt return entry; } -VkPushConstantRange BuildQuadArrayPassPushConstantRange() { +VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { VkPushConstantRange range; range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; range.offset = 0; - range.size = sizeof(u32); + range.size = static_cast<u32>(size); return range; } @@ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = { 0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; -std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { +// Quad indexed SPIR-V module. Generated from the "shaders/" directory. +constexpr u8 QUAD_INDEXED_SPV[] = { + 0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, + 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, + 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, + 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, + 0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, + 0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, + 0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, + 0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, + 0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, + 0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, + 0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, + 0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, + 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, + 0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, + 0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, + 0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, + 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, + 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, + 0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, + 0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, + 0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, + 0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, + 0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, + 0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, + 0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, + 0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, + 0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, + 0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, + 0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, + 0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, + 0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, + 0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, + 0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, + 0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, + 0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, + 0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, + 0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, + 0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, + 0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, + 0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, + 0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, + 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, + 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; + +std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { std::array<VkDescriptorSetLayoutBinding, 2> bindings; bindings[0].binding = 0; bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; @@ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings( return bindings; } -VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { +VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { VkDescriptorUpdateTemplateEntryKHR entry; entry.dstBinding = 0; entry.dstArrayElement = 0; @@ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, VKUpdateDescriptorQueue& update_descriptor_queue) : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), BuildQuadArrayPassDescriptorUpdateTemplateEntry(), - BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), + BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} QuadArrayPass::~QuadArrayPass() = default; std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { - const u32 num_triangle_vertices = num_vertices * 6 / 4; + const u32 num_triangle_vertices = (num_vertices / 4) * 6; const std::size_t staging_size = num_triangle_vertices * sizeof(u32); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); @@ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, VKUpdateDescriptorQueue& update_descriptor_queue) - : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), - BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), uint8_pass), scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, update_descriptor_queue{update_descriptor_queue} {} @@ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff return {*buffer.handle, 0}; } +QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKStagingBufferPool& staging_buffer_pool, + VKUpdateDescriptorQueue& update_descriptor_queue) + : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), + BuildInputOutputDescriptorUpdateTemplate(), + BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), + QUAD_INDEXED_SPV), + scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, + update_descriptor_queue{update_descriptor_queue} {} + +QuadIndexedPass::~QuadIndexedPass() = default; + +std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( + Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, + VkBuffer src_buffer, u64 src_offset) { + const u32 index_shift = [index_format] { + switch (index_format) { + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: + return 0; + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort: + return 1; + case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt: + return 2; + } + UNREACHABLE(); + return 2; + }(); + const u32 input_size = num_vertices << index_shift; + const u32 num_tri_vertices = (num_vertices / 4) * 6; + + const std::size_t staging_size = num_tri_vertices * sizeof(u32); + auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); + + update_descriptor_queue.Acquire(); + update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); + update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); + const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, + num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { + static constexpr u32 dispatch_size = 1024; + const std::array push_constants = {base_vertex, index_shift}; + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), + &push_constants); + cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); + + VkBufferMemoryBarrier barrier; + barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; + barrier.pNext = nullptr; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.buffer = buffer; + barrier.offset = 0; + barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); + }); + return {*buffer.handle, 0}; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.h b/src/video_core/renderer_vulkan/vk_compute_pass.h index ec80c8683..26bf834de 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.h +++ b/src/video_core/renderer_vulkan/vk_compute_pass.h @@ -8,6 +8,7 @@ #include <utility> #include <vector> #include "common/common_types.h" +#include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/wrapper.h" @@ -73,4 +74,22 @@ private: VKUpdateDescriptorQueue& update_descriptor_queue; }; +class QuadIndexedPass final : public VKComputePass { +public: + explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, + VKDescriptorPool& descriptor_pool, + VKStagingBufferPool& staging_buffer_pool, + VKUpdateDescriptorQueue& update_descriptor_queue); + ~QuadIndexedPass(); + + std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, + u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, + u64 src_offset); + +private: + VKScheduler& scheduler; + VKStagingBufferPool& staging_buffer_pool; + VKUpdateDescriptorQueue& update_descriptor_queue; +}; + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4ca0febb8..9bf9e1028 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -292,6 +292,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind staging_pool(device, memory_manager, scheduler), descriptor_pool(device), update_descriptor_queue(device, scheduler), renderpass_cache(device), quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), + quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, staging_pool), @@ -844,18 +845,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar bool is_indexed) { const auto& regs = system.GPU().Maxwell3D().regs; switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::Quads: - if (params.is_indexed) { - UNIMPLEMENTED(); - } else { + case Maxwell::PrimitiveTopology::Quads: { + if (!params.is_indexed) { const auto [buffer, offset] = quad_array_pass.Assemble(params.num_vertices, params.base_vertex); buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); params.base_vertex = 0; params.num_vertices = params.num_vertices * 6 / 4; params.is_indexed = true; + break; } + const GPUVAddr gpu_addr = regs.index_array.IndexStart(); + auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); + std::tie(buffer, offset) = quad_indexed_pass.Assemble( + regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); + + buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); + params.num_vertices = (params.num_vertices / 4) * 6; + params.base_vertex = 0; break; + } default: { if (!is_indexed) { break; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 46037860a..d9108f862 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -254,6 +254,7 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; VKRenderPassCache renderpass_cache; QuadArrayPass quad_array_pass; + QuadIndexedPass quad_indexed_pass; Uint8Pass uint8_pass; VKTextureCache texture_cache; diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 38a93a01a..868447af2 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include <algorithm> +#include <limits> #include <optional> #include <tuple> #include <vector> @@ -22,22 +23,38 @@ namespace { constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; -constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; +constexpr u64 PREFERRED_STREAM_BUFFER_SIZE = 256 * 1024 * 1024; -std::optional<u32> FindMemoryType(const VKDevice& device, u32 filter, - VkMemoryPropertyFlags wanted) { - const auto properties = device.GetPhysical().GetMemoryProperties(); - for (u32 i = 0; i < properties.memoryTypeCount; i++) { - if (!(filter & (1 << i))) { - continue; - } - if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { +/// Find a memory type with the passed requirements +std::optional<u32> FindMemoryType(const VkPhysicalDeviceMemoryProperties& properties, + VkMemoryPropertyFlags wanted, + u32 filter = std::numeric_limits<u32>::max()) { + for (u32 i = 0; i < properties.memoryTypeCount; ++i) { + const auto flags = properties.memoryTypes[i].propertyFlags; + if ((flags & wanted) == wanted && (filter & (1U << i)) != 0) { return i; } } return std::nullopt; } +/// Get the preferred host visible memory type. +u32 GetMemoryType(const VkPhysicalDeviceMemoryProperties& properties, + u32 filter = std::numeric_limits<u32>::max()) { + // Prefer device local host visible allocations. Both AMD and Nvidia now provide one. + // Otherwise search for a host visible allocation. + static constexpr auto HOST_MEMORY = + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + static constexpr auto DYNAMIC_MEMORY = HOST_MEMORY | VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + + std::optional preferred_type = FindMemoryType(properties, DYNAMIC_MEMORY); + if (!preferred_type) { + preferred_type = FindMemoryType(properties, HOST_MEMORY); + ASSERT_MSG(preferred_type, "No host visible and coherent memory type found"); + } + return preferred_type.value_or(0); +} + } // Anonymous namespace VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, @@ -51,7 +68,7 @@ VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, VKStreamBuffer::~VKStreamBuffer() = default; std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { - ASSERT(size <= STREAM_BUFFER_SIZE); + ASSERT(size <= stream_buffer_size); mapped_size = size; if (alignment > 0) { @@ -61,7 +78,7 @@ std::tuple<u8*, u64, bool> VKStreamBuffer::Map(u64 size, u64 alignment) { WaitPendingOperations(offset); bool invalidated = false; - if (offset + size > STREAM_BUFFER_SIZE) { + if (offset + size > stream_buffer_size) { // The buffer would overflow, save the amount of used watches and reset the state. invalidation_mark = current_watch_cursor; current_watch_cursor = 0; @@ -98,40 +115,37 @@ void VKStreamBuffer::Unmap(u64 size) { } void VKStreamBuffer::CreateBuffers(VkBufferUsageFlags usage) { + const auto memory_properties = device.GetPhysical().GetMemoryProperties(); + const u32 preferred_type = GetMemoryType(memory_properties); + const u32 preferred_heap = memory_properties.memoryTypes[preferred_type].heapIndex; + + // Substract from the preferred heap size some bytes to avoid getting out of memory. + const VkDeviceSize heap_size = memory_properties.memoryHeaps[preferred_heap].size; + const VkDeviceSize allocable_size = heap_size - 4 * 1024 * 1024; + VkBufferCreateInfo buffer_ci; buffer_ci.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; buffer_ci.pNext = nullptr; buffer_ci.flags = 0; - buffer_ci.size = STREAM_BUFFER_SIZE; + buffer_ci.size = std::min(PREFERRED_STREAM_BUFFER_SIZE, allocable_size); buffer_ci.usage = usage; buffer_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE; buffer_ci.queueFamilyIndexCount = 0; buffer_ci.pQueueFamilyIndices = nullptr; - const auto& dev = device.GetLogical(); - buffer = dev.CreateBuffer(buffer_ci); - - const auto& dld = device.GetDispatchLoader(); - const auto requirements = dev.GetBufferMemoryRequirements(*buffer); - // Prefer device local host visible allocations (this should hit AMD's pinned memory). - auto type = - FindMemoryType(device, requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (!type) { - // Otherwise search for a host visible allocation. - type = FindMemoryType(device, requirements.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - ASSERT_MSG(type, "No host visible and coherent memory type found"); - } + buffer = device.GetLogical().CreateBuffer(buffer_ci); + + const auto requirements = device.GetLogical().GetBufferMemoryRequirements(*buffer); + const u32 required_flags = requirements.memoryTypeBits; + stream_buffer_size = static_cast<u64>(requirements.size); + VkMemoryAllocateInfo memory_ai; memory_ai.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; memory_ai.pNext = nullptr; memory_ai.allocationSize = requirements.size; - memory_ai.memoryTypeIndex = *type; + memory_ai.memoryTypeIndex = GetMemoryType(memory_properties, required_flags); - memory = dev.AllocateMemory(memory_ai); + memory = device.GetLogical().AllocateMemory(memory_ai); buffer.BindMemory(*memory, 0); } diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 58ce8b973..dfddf7ad6 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -56,8 +56,9 @@ private: const VKDevice& device; ///< Vulkan device manager. VKScheduler& scheduler; ///< Command scheduler. - vk::Buffer buffer; ///< Mapped buffer. - vk::DeviceMemory memory; ///< Memory allocation. + vk::Buffer buffer; ///< Mapped buffer. + vk::DeviceMemory memory; ///< Memory allocation. + u64 stream_buffer_size{}; ///< Stream buffer size. u64 offset{}; ///< Buffer iterator. u64 mapped_size{}; ///< Size reserved for the current copy. diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index 0dd7a1196..85ee9aa5e 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -352,8 +352,10 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); } else { const Node image_register = GetRegister(instr.gpr39); - const auto [base_image, buffer, offset] = TrackCbuf( - image_register, global_code, static_cast<s64>(global_code.size())); + const auto result = TrackCbuf(image_register, global_code, + static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); descriptor = registry.ObtainBindlessSampler(buffer, offset); } if (!descriptor) { @@ -497,9 +499,12 @@ Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType t Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) { const Node image_register = GetRegister(reg); - const auto [base_image, buffer, offset] = + const auto result = TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size())); + const auto buffer = std::get<1>(result); + const auto offset = std::get<2>(result); + const auto it = std::find_if(std::begin(used_images), std::end(used_images), [buffer = buffer, offset = offset](const Image& entry) { diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 8852c8a1b..822674926 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -56,8 +56,7 @@ Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsed(offset); + used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset); return MakeNode<CbufNode>(index, Immediate(offset)); } @@ -66,8 +65,7 @@ Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) { const auto index = static_cast<u32>(index_); const auto offset = static_cast<u32>(offset_); - const auto [entry, is_new] = used_cbufs.try_emplace(index); - entry->second.MarkAsUsedIndirect(); + used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect(); Node final_offset = [&] { // Attempt to inline constant buffer without a variable offset. This is done to allow @@ -166,6 +164,7 @@ Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signe std::move(value), Immediate(16)); value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE, std::move(value), Immediate(16)); + return value; case Register::Size::Word: // Default - do nothing return value; diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp index 10739b37d..513e9bf49 100644 --- a/src/video_core/shader/track.cpp +++ b/src/video_core/shader/track.cpp @@ -27,8 +27,9 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { const auto& conditional_code = conditional->GetCode(); - auto [found, internal_cursor] = FindOperation( + auto result = FindOperation( conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code); + auto& found = result.first; if (found) { return {std::move(found), cursor}; } @@ -75,12 +76,13 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons s64 cursor) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); return {tracked, track}; - } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { + } + if (const auto operation = std::get_if<OperationNode>(&*offset)) { const u32 bound_buffer = registry.GetBoundBuffer(); if (bound_buffer != cbuf->GetIndex()) { return {}; @@ -93,12 +95,12 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); const auto& gpu_driver = registry.AccessGuestDriverProfile(); const u32 bindless_cv = NewCustomVariable(); - const Node op = + Node op = Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize())); const Node cv_node = GetCustomVariable(bindless_cv); Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op)); - const std::size_t amend_index = DeclareAmend(amend_op); + const std::size_t amend_index = DeclareAmend(std::move(amend_op)); AmendNodeCv(amend_index, code[cursor]); // TODO Implement Bindless Index custom variable auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), @@ -141,7 +143,7 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co s64 cursor) const { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { // Constant buffer found, test if it's an immediate - const auto offset = cbuf->GetOffset(); + const auto& offset = cbuf->GetOffset(); if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { return {tracked, cbuf->GetIndex(), immediate->GetValue()}; } @@ -186,8 +188,8 @@ std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& co std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const { // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register // that it uses as operand - const auto [found, found_cursor] = - TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1); + const auto& found = result.first; if (!found) { return {}; } diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp index e151c26c4..25d2ee2e8 100644 --- a/src/video_core/texture_cache/format_lookup_table.cpp +++ b/src/video_core/texture_cache/format_lookup_table.cpp @@ -196,9 +196,9 @@ std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb ComponentType alpha_component) noexcept { const auto format_index = static_cast<std::size_t>(format); const auto red_index = static_cast<std::size_t>(red_component); - const auto green_index = static_cast<std::size_t>(red_component); - const auto blue_index = static_cast<std::size_t>(red_component); - const auto alpha_index = static_cast<std::size_t>(red_component); + const auto green_index = static_cast<std::size_t>(green_component); + const auto blue_index = static_cast<std::size_t>(blue_component); + const auto alpha_index = static_cast<std::size_t>(alpha_component); const std::size_t srgb_index = is_srgb ? 1 : 0; return format_index * PerFormat + diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp index 6f3ef45be..0de499946 100644 --- a/src/video_core/texture_cache/surface_params.cpp +++ b/src/video_core/texture_cache/surface_params.cpp @@ -167,7 +167,6 @@ SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_tabl SurfaceParams SurfaceParams::CreateForDepthBuffer(Core::System& system) { const auto& regs = system.GPU().Maxwell3D().regs; - regs.zeta_width, regs.zeta_height, regs.zeta.format, regs.zeta.memory_layout.type; SurfaceParams params; params.is_tiled = regs.zeta.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 3e8663adf..69ca08fd1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -647,7 +647,8 @@ private: break; } const u32 offset = static_cast<u32>(surface->GetCpuAddr() - cpu_addr); - const auto [x, y, z] = params.GetBlockOffsetXYZ(offset); + const auto offsets = params.GetBlockOffsetXYZ(offset); + const auto z = std::get<2>(offsets); modified |= surface->IsModified(); const CopyParams copy_params(0, 0, 0, 0, 0, z, 0, 0, params.width, params.height, 1); |