diff options
Diffstat (limited to 'src/video_core')
21 files changed, 321 insertions, 174 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index e7cb87589..d374b73cf 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -661,6 +661,10 @@ union Instruction { constexpr Instruction(u64 value) : value{value} {} constexpr Instruction(const Instruction& instr) : value(instr.value) {} + constexpr bool Bit(u64 offset) const { + return ((value >> offset) & 1) != 0; + } + BitField<0, 8, Register> gpr0; BitField<8, 8, Register> gpr8; union { @@ -1874,7 +1878,9 @@ public: HSETP2_C, HSETP2_R, HSETP2_IMM, + HSET2_C, HSET2_R, + HSET2_IMM, POPC_C, POPC_R, POPC_IMM, @@ -2194,7 +2200,9 @@ private: INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"), INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"), INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"), + INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"), INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"), + INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"), INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"), INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"), INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"), diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 8eb017f65..482e49711 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <chrono> + #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" @@ -154,8 +156,7 @@ u64 GPU::GetTicks() const { constexpr u64 gpu_ticks_num = 384; constexpr u64 gpu_ticks_den = 625; - const u64 cpu_ticks = system.CoreTiming().GetTicks(); - u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count(); + u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); if (Settings::values.use_fast_gpu_time) { nanoseconds /= 256; } diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a1b4c305c..2c42483bd 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -284,6 +284,12 @@ public: /// core timing events. virtual void Start() = 0; + /// Obtain the CPU Context + virtual void ObtainContext() = 0; + + /// Release the CPU Context + virtual void ReleaseContext() = 0; + /// Push GPU command entries to be processed virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 53305ab43..7b855f63e 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa GPUAsynch::~GPUAsynch() = default; void GPUAsynch::Start() { - cpu_context->MakeCurrent(); gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher); } +void GPUAsynch::ObtainContext() { + cpu_context->MakeCurrent(); +} + +void GPUAsynch::ReleaseContext() { + cpu_context->DoneCurrent(); +} + void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { gpu_thread.SubmitList(std::move(entries)); } diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 517658612..15e9f1d38 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,8 @@ public: ~GPUAsynch() override; void Start() override; + void ObtainContext() override; + void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp index 6f38a672a..aaeb9811d 100644 --- a/src/video_core/gpu_synch.cpp +++ b/src/video_core/gpu_synch.cpp @@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase GPUSynch::~GPUSynch() = default; -void GPUSynch::Start() { +void GPUSynch::Start() {} + +void GPUSynch::ObtainContext() { context->MakeCurrent(); } +void GPUSynch::ReleaseContext() { + context->DoneCurrent(); +} + void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { dma_pusher->Push(std::move(entries)); dma_pusher->DispatchCalls(); diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 4a6e9a01d..762c20aa5 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,8 @@ public: ~GPUSynch() override; void Start() override; + void ObtainContext() override; + void ReleaseContext() override; void PushGPUEntries(Tegra::CommandList&& entries) override; void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; void FlushRegion(VAddr addr, u64 size) override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index c3bb4fe06..738c6f0c1 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "common/thread.h" #include "core/core.h" #include "core/frontend/emu_window.h" #include "core/settings.h" @@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread { static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, SynchState& state) { - MicroProfileOnThreadCreate("GpuThread"); + std::string name = "yuzu:GPU"; + MicroProfileOnThreadCreate(name.c_str()); + Common::SetCurrentThreadName(name.c_str()); + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + system.RegisterHostThread(); // Wait for first GPU command before acquiring the window context while (state.queue.Empty()) diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp index ef7dad349..a50e7b4e0 100644 --- a/src/video_core/macro/macro.cpp +++ b/src/video_core/macro/macro.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <optional> #include <boost/container_hash/hash.hpp> #include "common/assert.h" #include "common/logging/log.h" @@ -35,22 +36,40 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method, } } else { // Macro not compiled, check if it's uploaded and if so, compile it - auto macro_code = uploaded_macro_code.find(method); + std::optional<u32> mid_method = std::nullopt; + const auto macro_code = uploaded_macro_code.find(method); if (macro_code == uploaded_macro_code.end()) { - UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); - return; + for (const auto& [method_base, code] : uploaded_macro_code) { + if (method >= method_base && (method - method_base) < code.size()) { + mid_method = method_base; + break; + } + } + if (!mid_method.has_value()) { + UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method); + return; + } } auto& cache_info = macro_cache[method]; - cache_info.hash = boost::hash_value(macro_code->second); - cache_info.lle_program = Compile(macro_code->second); + + if (!mid_method.has_value()) { + cache_info.lle_program = Compile(macro_code->second); + cache_info.hash = boost::hash_value(macro_code->second); + } else { + const auto& macro_cached = uploaded_macro_code[mid_method.value()]; + const auto rebased_method = method - mid_method.value(); + auto& code = uploaded_macro_code[method]; + code.resize(macro_cached.size() - rebased_method); + std::memcpy(code.data(), macro_cached.data() + rebased_method, + code.size() * sizeof(u32)); + cache_info.hash = boost::hash_value(code); + cache_info.lle_program = Compile(code); + } auto hle_program = hle_macros->GetHLEProgram(cache_info.hash); if (hle_program.has_value()) { cache_info.has_hle_program = true; cache_info.hle_program = std::move(hle_program.value()); - } - - if (cache_info.has_hle_program) { cache_info.hle_program->Execute(parameters, method); } else { cache_info.lle_program->Execute(parameters, method); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b6b6659c1..208fc6167 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -188,20 +188,6 @@ bool IsASTCSupported() { return true; } -/// @brief Returns true when a GL_RENDERER is a Turing GPU -/// @param renderer GL_RENDERER string -bool IsTuring(std::string_view renderer) { - static constexpr std::array<std::string_view, 12> TURING_GPUS = { - "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070", - "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000", - "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4", - }; - return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(), - [renderer](std::string_view candidate) { - return renderer.find(candidate) != std::string_view::npos; - }); -} - } // Anonymous namespace Device::Device() @@ -213,7 +199,6 @@ Device::Device() const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_amd = vendor == "ATI Technologies Inc."; - const bool is_turing = is_nvidia && IsTuring(renderer); bool disable_fast_buffer_sub_data = false; if (is_nvidia && version == "4.6.0 NVIDIA 443.24") { @@ -238,15 +223,12 @@ Device::Device() has_component_indexing_bug = is_amd; has_precise_bug = TestPreciseBug(); has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2; + has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory; // At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive // uniform buffers as "push constants" has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data; - // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on - // DeleteBuffers. Disable unified memory on these devices. - has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing; - use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 && GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback && GLAD_GL_NV_transform_feedback2; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 362457ffe..e960a0ef1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -213,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() { if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { glVertexAttribIFormat(gl_index, attrib.ComponentCount(), - MaxwellToGL::VertexType(attrib), attrib.offset); + MaxwellToGL::VertexFormat(attrib), attrib.offset); } else { - glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + glVertexAttribFormat(gl_index, attrib.ComponentCount(), + MaxwellToGL::VertexFormat(attrib), attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); } glVertexAttribBinding(gl_index, attrib.buffer); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 35e329240..774e70a5b 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -24,10 +24,11 @@ namespace MaxwellToGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -inline GLenum VertexType(Maxwell::VertexAttribute attrib) { +inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { switch (attrib.type) { - case Maxwell::VertexAttribute::Type::UnsignedInt: case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::UnsignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_UNSIGNED_INT_2_10_10_10_REV; } break; - case Maxwell::VertexAttribute::Type::SignedInt: case Maxwell::VertexAttribute::Type::SignedNorm: + case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (attrib.size) { case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: @@ -84,36 +86,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { return GL_FLOAT; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_UNSIGNED_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_UNSIGNED_SHORT; - } - break; - case Maxwell::VertexAttribute::Type::SignedScaled: - switch (attrib.size) { - case Maxwell::VertexAttribute::Size::Size_8: - case Maxwell::VertexAttribute::Size::Size_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8: - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return GL_BYTE; - case Maxwell::VertexAttribute::Size::Size_16: - case Maxwell::VertexAttribute::Size::Size_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16: - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return GL_SHORT; - } - break; } - UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(), + UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(), attrib.SizeString()); return {}; } diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp index 1f2b6734b..d7f1ae89f 100644 --- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp +++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp @@ -294,6 +294,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device, VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) { switch (type) { + case Maxwell::VertexAttribute::Type::UnsignedNorm: + switch (size) { + case Maxwell::VertexAttribute::Size::Size_8: + return VK_FORMAT_R8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8: + return VK_FORMAT_R8G8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8: + return VK_FORMAT_R8G8B8_UNORM; + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return VK_FORMAT_R8G8B8A8_UNORM; + case Maxwell::VertexAttribute::Size::Size_16: + return VK_FORMAT_R16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16: + return VK_FORMAT_R16G16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16: + return VK_FORMAT_R16G16B16_UNORM; + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return VK_FORMAT_R16G16B16A16_UNORM; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UNORM_PACK32; + } + break; case Maxwell::VertexAttribute::Type::SignedNorm: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: @@ -314,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R16G16B16A16_SNORM; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: return VK_FORMAT_A2B10G10R10_SNORM_PACK32; - default: - break; } break; - case Maxwell::VertexAttribute::Type::UnsignedNorm: + case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_UNORM; + return VK_FORMAT_R8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_UNORM; + return VK_FORMAT_R8G8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_UNORM; + return VK_FORMAT_R8G8B8_USCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_UNORM; + return VK_FORMAT_R8G8B8A8_USCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_UNORM; + return VK_FORMAT_R16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_UNORM; + return VK_FORMAT_R16G16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_UNORM; + return VK_FORMAT_R16G16B16_USCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_UNORM; + return VK_FORMAT_R16G16B16A16_USCALED; case Maxwell::VertexAttribute::Size::Size_10_10_10_2: - return VK_FORMAT_A2B10G10R10_UNORM_PACK32; - default: - break; + return VK_FORMAT_A2B10G10R10_USCALED_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedInt: + case Maxwell::VertexAttribute::Type::SignedScaled: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SINT; + return VK_FORMAT_R8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SINT; + return VK_FORMAT_R8G8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SINT; + return VK_FORMAT_R8G8B8_SSCALED; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SINT; + return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SINT; + return VK_FORMAT_R16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SINT; + return VK_FORMAT_R16G16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SINT; + return VK_FORMAT_R16G16B16_SSCALED; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SINT; - case Maxwell::VertexAttribute::Size::Size_32: - return VK_FORMAT_R32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32: - return VK_FORMAT_R32G32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32: - return VK_FORMAT_R32G32B32_SINT; - case Maxwell::VertexAttribute::Size::Size_32_32_32_32: - return VK_FORMAT_R32G32B32A32_SINT; - default: - break; + return VK_FORMAT_R16G16B16A16_SSCALED; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SSCALED_PACK32; } break; case Maxwell::VertexAttribute::Type::UnsignedInt: @@ -398,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_UINT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_UINT; - default: - break; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_UINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::UnsignedScaled: + case Maxwell::VertexAttribute::Type::SignedInt: switch (size) { case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_USCALED; + return VK_FORMAT_R8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_USCALED; + return VK_FORMAT_R8G8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_USCALED; + return VK_FORMAT_R8G8B8_SINT; case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_USCALED; + return VK_FORMAT_R8G8B8A8_SINT; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_USCALED; + return VK_FORMAT_R16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_USCALED; + return VK_FORMAT_R16G16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_USCALED; + return VK_FORMAT_R16G16B16_SINT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_USCALED; - default: - break; + return VK_FORMAT_R16G16B16A16_SINT; + case Maxwell::VertexAttribute::Size::Size_32: + return VK_FORMAT_R32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32: + return VK_FORMAT_R32G32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32: + return VK_FORMAT_R32G32B32_SINT; + case Maxwell::VertexAttribute::Size::Size_32_32_32_32: + return VK_FORMAT_R32G32B32A32_SINT; + case Maxwell::VertexAttribute::Size::Size_10_10_10_2: + return VK_FORMAT_A2B10G10R10_SINT_PACK32; } break; - case Maxwell::VertexAttribute::Type::SignedScaled: + case Maxwell::VertexAttribute::Type::Float: switch (size) { - case Maxwell::VertexAttribute::Size::Size_8: - return VK_FORMAT_R8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8: - return VK_FORMAT_R8G8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8: - return VK_FORMAT_R8G8B8_SSCALED; - case Maxwell::VertexAttribute::Size::Size_8_8_8_8: - return VK_FORMAT_R8G8B8A8_SSCALED; case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SSCALED; + return VK_FORMAT_R16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SSCALED; + return VK_FORMAT_R16G16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SSCALED; + return VK_FORMAT_R16G16B16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SSCALED; - default: - break; - } - break; - case Maxwell::VertexAttribute::Type::Float: - switch (size) { + return VK_FORMAT_R16G16B16A16_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32: return VK_FORMAT_R32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32: @@ -456,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib return VK_FORMAT_R32G32B32_SFLOAT; case Maxwell::VertexAttribute::Size::Size_32_32_32_32: return VK_FORMAT_R32G32B32A32_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16: - return VK_FORMAT_R16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16: - return VK_FORMAT_R16G16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16: - return VK_FORMAT_R16G16B16_SFLOAT; - case Maxwell::VertexAttribute::Size::Size_16_16_16_16: - return VK_FORMAT_R16G16B16A16_SFLOAT; - default: - break; } break; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index cd9673d1f..2d9b18ed9 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -155,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc } } - static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"}; - vk::Span<const char*> layers = layers_data; - if (!enable_layers) { - layers = {}; + std::vector<const char*> layers; + layers.reserve(1); + if (enable_layers) { + layers.push_back("VK_LAYER_KHRONOS_validation"); + } + + const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld); + if (!layer_properties) { + LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers"); + layers.clear(); + } + + for (auto layer_it = layers.begin(); layer_it != layers.end();) { + const char* const layer = *layer_it; + const auto it = std::find_if( + layer_properties->begin(), layer_properties->end(), + [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); }); + if (it == layer_properties->end()) { + LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer); + layer_it = layers.erase(layer_it); + } else { + ++layer_it; + } } + vk::Instance instance = vk::Instance::Create(layers, extensions, dld); if (!instance) { LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance"); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e3714ee6d..a8d94eac3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry } } +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) { + if (!is_clear) { + return true; + } + // First we have to make sure all clear masks are enabled. + if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B || + !regs.clear_buffers.A) { + return true; + } + // If scissors are disabled, the whole screen is cleared + if (!regs.clear_flags.scissor) { + return false; + } + // Then we have to confirm scissor testing clears the whole image + const std::size_t index = regs.clear_buffers.RT; + const auto& scissor = regs.scissor_test[0]; + return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width || + scissor.max_y < regs.rt[index].height; +} + +/// @brief Determine if an attachment to be updated has to preserve contents +/// @param is_clear True when a clear is being executed +/// @param regs 3D registers +/// @return True when the contents have to be preserved +bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) { + // If we are not clearing, the contents have to be preserved + if (!is_clear) { + return true; + } + // For depth stencil clears we only have to confirm scissor test covers the whole image + if (!regs.clear_flags.scissor) { + return false; + } + // Make sure the clear cover the whole image + const auto& scissor = regs.scissor_test[0]; + return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width || + scissor.max_y < regs.zeta_height; +} + } // Anonymous namespace class BufferBindings final { @@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Unmap(); - const Texceptions texceptions = UpdateAttachments(); + const Texceptions texceptions = UpdateAttachments(false); SetupImageTransitions(texceptions, color_attachments, zeta_attachment); key.renderpass_params = GetRenderPassParams(texceptions); @@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() { return; } - [[maybe_unused]] const auto texceptions = UpdateAttachments(); + [[maybe_unused]] const auto texceptions = UpdateAttachments(true); DEBUG_ASSERT(texceptions.none()); SetupImageTransitions(0, color_attachments, zeta_attachment); @@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() { draw_counter = 0; } -RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { +RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) { MICROPROFILE_SCOPE(Vulkan_RenderTargets); - auto& dirty = system.GPU().Maxwell3D().dirty.flags; + auto& maxwell3d = system.GPU().Maxwell3D(); + auto& dirty = maxwell3d.dirty.flags; + auto& regs = maxwell3d.regs; + const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets]; dirty[VideoCommon::Dirty::RenderTargets] = false; @@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { Texceptions texceptions; for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { if (update_rendertargets) { - color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); + const bool preserve_contents = HasToPreserveColorContents(is_clear, regs); + color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents); } if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { texceptions[rt] = true; @@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { } if (update_rendertargets) { - zeta_attachment = texture_cache.GetDepthBufferSurface(true); + const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs); + zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents); } if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { texceptions[ZETA_TEXCEPTION_INDEX] = true; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index c8c187606..83e00e7e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -159,7 +159,10 @@ private: void FlushWork(); - Texceptions UpdateAttachments(); + /// @brief Updates the currently bound attachments + /// @param is_clear True when the framebuffer is updated as a clear + /// @return Bitfield of attachments being used as sampled textures + Texceptions UpdateAttachments(bool is_clear); std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 82ec9180e..56524e6f3 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -9,6 +9,7 @@ #include <utility> #include "common/microprofile.h" +#include "common/thread.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" @@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { } void VKScheduler::WorkerThread() { + Common::SetCurrentThreadPriority(Common::ThreadPriority::High); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp index 42eff85d3..0d485a662 100644 --- a/src/video_core/renderer_vulkan/wrapper.cpp +++ b/src/video_core/renderer_vulkan/wrapper.cpp @@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { bool Load(InstanceDispatch& dld) noexcept { #define X(name) Proc(dld.name, dld, #name) - return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties); + return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) && + X(vkEnumerateInstanceLayerProperties); #undef X } @@ -770,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp return properties; } +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld) { + u32 num; + if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) { + return std::nullopt; + } + std::vector<VkLayerProperties> properties(num); + if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) { + return std::nullopt; + } + return properties; +} + } // namespace Vulkan::vk diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h index da42ca88e..d56fdb3f9 100644 --- a/src/video_core/renderer_vulkan/wrapper.h +++ b/src/video_core/renderer_vulkan/wrapper.h @@ -141,6 +141,7 @@ struct InstanceDispatch { PFN_vkCreateInstance vkCreateInstance; PFN_vkDestroyInstance vkDestroyInstance; PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties; + PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties; PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT; PFN_vkCreateDevice vkCreateDevice; @@ -996,4 +997,7 @@ private: std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties( const InstanceDispatch& dld); +std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties( + const InstanceDispatch& dld); + } // namespace Vulkan::vk diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp index 848e46874..b2e88fa20 100644 --- a/src/video_core/shader/decode/half_set.cpp +++ b/src/video_core/shader/decode/half_set.cpp @@ -13,55 +13,101 @@ namespace VideoCommon::Shader { +using std::move; using Tegra::Shader::Instruction; using Tegra::Shader::OpCode; +using Tegra::Shader::PredCondition; u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); - if (instr.hset2.ftz == 0) { - LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + PredCondition cond; + bool bf; + bool ftz; + bool neg_a; + bool abs_a; + bool neg_b; + bool abs_b; + switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_C: + case OpCode::Id::HSET2_IMM: + cond = instr.hsetp2.cbuf_and_imm.cond; + bf = instr.Bit(53); + ftz = instr.Bit(54); + neg_a = instr.Bit(43); + abs_a = instr.Bit(44); + neg_b = instr.Bit(56); + abs_b = instr.Bit(54); + break; + case OpCode::Id::HSET2_R: + cond = instr.hsetp2.reg.cond; + bf = instr.Bit(49); + ftz = instr.Bit(50); + neg_a = instr.Bit(43); + abs_a = instr.Bit(44); + neg_b = instr.Bit(31); + abs_b = instr.Bit(30); + break; + default: + UNREACHABLE(); } - Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); - op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a); - - Node op_b = [&]() { + Node op_b = [this, instr, opcode] { switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_C: + // Inform as unimplemented as this is not tested. + UNIMPLEMENTED_MSG("HSET2_C is not implemented"); + return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()); case OpCode::Id::HSET2_R: return GetRegister(instr.gpr20); + case OpCode::Id::HSET2_IMM: + return UnpackHalfImmediate(instr, true); default: UNREACHABLE(); - return Immediate(0); + return Node{}; } }(); - op_b = UnpackHalfFloat(op_b, instr.hset2.type_b); - op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b); - const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + if (!ftz) { + LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName()); + } + + Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a); + op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a); + + switch (opcode->get().GetId()) { + case OpCode::Id::HSET2_R: + op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b); + [[fallthrough]]; + case OpCode::Id::HSET2_C: + op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b); + break; + default: + break; + } - const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b); + Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred); + + Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b); const OperationCode combiner = GetPredicateCombiner(instr.hset2.op); // HSET2 operates on each half float in the pack. std::array<Node, 2> values; for (u32 i = 0; i < 2; ++i) { - const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff; - const Node true_value = Immediate(raw_value << (i * 16)); - const Node false_value = Immediate(0); - - const Node comparison = - Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); - const Node predicate = Operation(combiner, comparison, second_pred); + const u32 raw_value = bf ? 0x3c00 : 0xffff; + Node true_value = Immediate(raw_value << (i * 16)); + Node false_value = Immediate(0); + Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i)); + Node predicate = Operation(combiner, comparison, second_pred); values[i] = - Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value); + Operation(OperationCode::Select, predicate, move(true_value), move(false_value)); } - const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]); - SetRegister(bb, instr.gpr0, value); + Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]); + SetRegister(bb, instr.gpr0, move(value)); return pc; } diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp index 94d3a6ae5..0caf3b4f0 100644 --- a/src/video_core/texture_cache/surface_base.cpp +++ b/src/video_core/texture_cache/surface_base.cpp @@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( } const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; const auto layer{static_cast<u32>(relative_address / layer_size)}; + if (layer >= params.depth) { + return {}; + } const GPUVAddr mipmap_address = relative_address - layer_size * layer; const auto mipmap_it = Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); |