summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/shader_bytecode.h8
-rw-r--r--src/video_core/gpu.cpp5
-rw-r--r--src/video_core/gpu.h6
-rw-r--r--src/video_core/gpu_asynch.cpp9
-rw-r--r--src/video_core/gpu_asynch.h2
-rw-r--r--src/video_core/gpu_synch.cpp8
-rw-r--r--src/video_core/gpu_synch.h2
-rw-r--r--src/video_core/gpu_thread.cpp7
-rw-r--r--src/video_core/macro/macro.cpp35
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp20
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp5
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h38
-rw-r--r--src/video_core/renderer_vulkan/maxwell_to_vk.cpp144
-rw-r--r--src/video_core/renderer_vulkan/renderer_vulkan.cpp28
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.cpp60
-rw-r--r--src/video_core/renderer_vulkan/vk_rasterizer.h5
-rw-r--r--src/video_core/renderer_vulkan/vk_scheduler.cpp2
-rw-r--r--src/video_core/renderer_vulkan/wrapper.cpp16
-rw-r--r--src/video_core/renderer_vulkan/wrapper.h4
-rw-r--r--src/video_core/shader/decode/half_set.cpp88
-rw-r--r--src/video_core/texture_cache/surface_base.cpp3
21 files changed, 321 insertions, 174 deletions
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index e7cb87589..d374b73cf 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -661,6 +661,10 @@ union Instruction {
constexpr Instruction(u64 value) : value{value} {}
constexpr Instruction(const Instruction& instr) : value(instr.value) {}
+ constexpr bool Bit(u64 offset) const {
+ return ((value >> offset) & 1) != 0;
+ }
+
BitField<0, 8, Register> gpr0;
BitField<8, 8, Register> gpr8;
union {
@@ -1874,7 +1878,9 @@ public:
HSETP2_C,
HSETP2_R,
HSETP2_IMM,
+ HSET2_C,
HSET2_R,
+ HSET2_IMM,
POPC_C,
POPC_R,
POPC_IMM,
@@ -2194,7 +2200,9 @@ private:
INST("0111111-1-------", Id::HSETP2_C, Type::HalfSetPredicate, "HSETP2_C"),
INST("0101110100100---", Id::HSETP2_R, Type::HalfSetPredicate, "HSETP2_R"),
INST("0111111-0-------", Id::HSETP2_IMM, Type::HalfSetPredicate, "HSETP2_IMM"),
+ INST("0111110-1-------", Id::HSET2_C, Type::HalfSet, "HSET2_C"),
INST("0101110100011---", Id::HSET2_R, Type::HalfSet, "HSET2_R"),
+ INST("0111110-0-------", Id::HSET2_IMM, Type::HalfSet, "HSET2_IMM"),
INST("010110111010----", Id::FCMP_RR, Type::Arithmetic, "FCMP_RR"),
INST("010010111010----", Id::FCMP_RC, Type::Arithmetic, "FCMP_RC"),
INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 8eb017f65..482e49711 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <chrono>
+
#include "common/assert.h"
#include "common/microprofile.h"
#include "core/core.h"
@@ -154,8 +156,7 @@ u64 GPU::GetTicks() const {
constexpr u64 gpu_ticks_num = 384;
constexpr u64 gpu_ticks_den = 625;
- const u64 cpu_ticks = system.CoreTiming().GetTicks();
- u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
+ u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count();
if (Settings::values.use_fast_gpu_time) {
nanoseconds /= 256;
}
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index a1b4c305c..2c42483bd 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -284,6 +284,12 @@ public:
/// core timing events.
virtual void Start() = 0;
+ /// Obtain the CPU Context
+ virtual void ObtainContext() = 0;
+
+ /// Release the CPU Context
+ virtual void ReleaseContext() = 0;
+
/// Push GPU command entries to be processed
virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index 53305ab43..7b855f63e 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -19,10 +19,17 @@ GPUAsynch::GPUAsynch(Core::System& system, std::unique_ptr<VideoCore::RendererBa
GPUAsynch::~GPUAsynch() = default;
void GPUAsynch::Start() {
- cpu_context->MakeCurrent();
gpu_thread.StartThread(*renderer, *gpu_context, *dma_pusher);
}
+void GPUAsynch::ObtainContext() {
+ cpu_context->MakeCurrent();
+}
+
+void GPUAsynch::ReleaseContext() {
+ cpu_context->DoneCurrent();
+}
+
void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) {
gpu_thread.SubmitList(std::move(entries));
}
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index 517658612..15e9f1d38 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,8 @@ public:
~GPUAsynch() override;
void Start() override;
+ void ObtainContext() override;
+ void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp
index 6f38a672a..aaeb9811d 100644
--- a/src/video_core/gpu_synch.cpp
+++ b/src/video_core/gpu_synch.cpp
@@ -13,10 +13,16 @@ GPUSynch::GPUSynch(Core::System& system, std::unique_ptr<VideoCore::RendererBase
GPUSynch::~GPUSynch() = default;
-void GPUSynch::Start() {
+void GPUSynch::Start() {}
+
+void GPUSynch::ObtainContext() {
context->MakeCurrent();
}
+void GPUSynch::ReleaseContext() {
+ context->DoneCurrent();
+}
+
void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) {
dma_pusher->Push(std::move(entries));
dma_pusher->DispatchCalls();
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 4a6e9a01d..762c20aa5 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,8 @@ public:
~GPUSynch() override;
void Start() override;
+ void ObtainContext() override;
+ void ReleaseContext() override;
void PushGPUEntries(Tegra::CommandList&& entries) override;
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
void FlushRegion(VAddr addr, u64 size) override;
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index c3bb4fe06..738c6f0c1 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/microprofile.h"
+#include "common/thread.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "core/settings.h"
@@ -18,7 +19,11 @@ namespace VideoCommon::GPUThread {
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
SynchState& state) {
- MicroProfileOnThreadCreate("GpuThread");
+ std::string name = "yuzu:GPU";
+ MicroProfileOnThreadCreate(name.c_str());
+ Common::SetCurrentThreadName(name.c_str());
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+ system.RegisterHostThread();
// Wait for first GPU command before acquiring the window context
while (state.queue.Empty())
diff --git a/src/video_core/macro/macro.cpp b/src/video_core/macro/macro.cpp
index ef7dad349..a50e7b4e0 100644
--- a/src/video_core/macro/macro.cpp
+++ b/src/video_core/macro/macro.cpp
@@ -2,6 +2,7 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <optional>
#include <boost/container_hash/hash.hpp>
#include "common/assert.h"
#include "common/logging/log.h"
@@ -35,22 +36,40 @@ void MacroEngine::Execute(Engines::Maxwell3D& maxwell3d, u32 method,
}
} else {
// Macro not compiled, check if it's uploaded and if so, compile it
- auto macro_code = uploaded_macro_code.find(method);
+ std::optional<u32> mid_method = std::nullopt;
+ const auto macro_code = uploaded_macro_code.find(method);
if (macro_code == uploaded_macro_code.end()) {
- UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
- return;
+ for (const auto& [method_base, code] : uploaded_macro_code) {
+ if (method >= method_base && (method - method_base) < code.size()) {
+ mid_method = method_base;
+ break;
+ }
+ }
+ if (!mid_method.has_value()) {
+ UNREACHABLE_MSG("Macro 0x{0:x} was not uploaded", method);
+ return;
+ }
}
auto& cache_info = macro_cache[method];
- cache_info.hash = boost::hash_value(macro_code->second);
- cache_info.lle_program = Compile(macro_code->second);
+
+ if (!mid_method.has_value()) {
+ cache_info.lle_program = Compile(macro_code->second);
+ cache_info.hash = boost::hash_value(macro_code->second);
+ } else {
+ const auto& macro_cached = uploaded_macro_code[mid_method.value()];
+ const auto rebased_method = method - mid_method.value();
+ auto& code = uploaded_macro_code[method];
+ code.resize(macro_cached.size() - rebased_method);
+ std::memcpy(code.data(), macro_cached.data() + rebased_method,
+ code.size() * sizeof(u32));
+ cache_info.hash = boost::hash_value(code);
+ cache_info.lle_program = Compile(code);
+ }
auto hle_program = hle_macros->GetHLEProgram(cache_info.hash);
if (hle_program.has_value()) {
cache_info.has_hle_program = true;
cache_info.hle_program = std::move(hle_program.value());
- }
-
- if (cache_info.has_hle_program) {
cache_info.hle_program->Execute(parameters, method);
} else {
cache_info.lle_program->Execute(parameters, method);
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index b6b6659c1..208fc6167 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -188,20 +188,6 @@ bool IsASTCSupported() {
return true;
}
-/// @brief Returns true when a GL_RENDERER is a Turing GPU
-/// @param renderer GL_RENDERER string
-bool IsTuring(std::string_view renderer) {
- static constexpr std::array<std::string_view, 12> TURING_GPUS = {
- "GTX 1650", "GTX 1660", "RTX 2060", "RTX 2070",
- "RTX 2080", "TITAN RTX", "Quadro RTX 3000", "Quadro RTX 4000",
- "Quadro RTX 5000", "Quadro RTX 6000", "Quadro RTX 8000", "Tesla T4",
- };
- return std::any_of(TURING_GPUS.begin(), TURING_GPUS.end(),
- [renderer](std::string_view candidate) {
- return renderer.find(candidate) != std::string_view::npos;
- });
-}
-
} // Anonymous namespace
Device::Device()
@@ -213,7 +199,6 @@ Device::Device()
const bool is_nvidia = vendor == "NVIDIA Corporation";
const bool is_amd = vendor == "ATI Technologies Inc.";
- const bool is_turing = is_nvidia && IsTuring(renderer);
bool disable_fast_buffer_sub_data = false;
if (is_nvidia && version == "4.6.0 NVIDIA 443.24") {
@@ -238,15 +223,12 @@ Device::Device()
has_component_indexing_bug = is_amd;
has_precise_bug = TestPreciseBug();
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
+ has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
// uniform buffers as "push constants"
has_fast_buffer_sub_data = is_nvidia && !disable_fast_buffer_sub_data;
- // Nvidia's driver on Turing GPUs randomly crashes when the buffer is made resident, or on
- // DeleteBuffers. Disable unified memory on these devices.
- has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory && !is_turing;
-
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
GLAD_GL_NV_compute_program5 && GLAD_GL_NV_transform_feedback &&
GLAD_GL_NV_transform_feedback2;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 362457ffe..e960a0ef1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -213,9 +213,10 @@ void RasterizerOpenGL::SetupVertexFormat() {
if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
- MaxwellToGL::VertexType(attrib), attrib.offset);
+ MaxwellToGL::VertexFormat(attrib), attrib.offset);
} else {
- glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+ glVertexAttribFormat(gl_index, attrib.ComponentCount(),
+ MaxwellToGL::VertexFormat(attrib),
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
}
glVertexAttribBinding(gl_index, attrib.buffer);
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 35e329240..774e70a5b 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -24,10 +24,11 @@ namespace MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
+inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
- case Maxwell::VertexAttribute::Type::UnsignedInt:
case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::UnsignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -48,8 +49,9 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_UNSIGNED_INT_2_10_10_10_REV;
}
break;
- case Maxwell::VertexAttribute::Type::SignedInt:
case Maxwell::VertexAttribute::Type::SignedNorm:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedInt:
switch (attrib.size) {
case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
@@ -84,36 +86,8 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
return GL_FLOAT;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
- switch (attrib.size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- case Maxwell::VertexAttribute::Size::Size_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return GL_UNSIGNED_BYTE;
- case Maxwell::VertexAttribute::Size::Size_16:
- case Maxwell::VertexAttribute::Size::Size_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return GL_UNSIGNED_SHORT;
- }
- break;
- case Maxwell::VertexAttribute::Type::SignedScaled:
- switch (attrib.size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- case Maxwell::VertexAttribute::Size::Size_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return GL_BYTE;
- case Maxwell::VertexAttribute::Size::Size_16:
- case Maxwell::VertexAttribute::Size::Size_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return GL_SHORT;
- }
- break;
}
- UNIMPLEMENTED_MSG("Unimplemented vertex type={} and size={}", attrib.TypeString(),
+ UNIMPLEMENTED_MSG("Unimplemented vertex format of type={} and size={}", attrib.TypeString(),
attrib.SizeString());
return {};
}
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 1f2b6734b..d7f1ae89f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -294,6 +294,28 @@ VkPrimitiveTopology PrimitiveTopology([[maybe_unused]] const VKDevice& device,
VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttribute::Size size) {
switch (type) {
+ case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ switch (size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ return VK_FORMAT_R8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ return VK_FORMAT_R8G8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ return VK_FORMAT_R8G8B8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return VK_FORMAT_R8G8B8A8_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ return VK_FORMAT_R16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ return VK_FORMAT_R16G16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ return VK_FORMAT_R16G16B16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return VK_FORMAT_R16G16B16A16_UNORM;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
+ }
+ break;
case Maxwell::VertexAttribute::Type::SignedNorm:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
@@ -314,62 +336,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R16G16B16A16_SNORM;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
- default:
- break;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedNorm:
+ case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_UNORM;
+ return VK_FORMAT_R8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_UNORM;
+ return VK_FORMAT_R8G8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_UNORM;
+ return VK_FORMAT_R8G8B8_USCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_UNORM;
+ return VK_FORMAT_R8G8B8A8_USCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_UNORM;
+ return VK_FORMAT_R16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_UNORM;
+ return VK_FORMAT_R16G16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_UNORM;
+ return VK_FORMAT_R16G16B16_USCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_UNORM;
+ return VK_FORMAT_R16G16B16A16_USCALED;
case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
- return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
- default:
- break;
+ return VK_FORMAT_A2B10G10R10_USCALED_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::SignedInt:
+ case Maxwell::VertexAttribute::Type::SignedScaled:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SINT;
+ return VK_FORMAT_R8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SINT;
+ return VK_FORMAT_R8G8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SINT;
+ return VK_FORMAT_R8G8B8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SINT;
+ return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SINT;
+ return VK_FORMAT_R16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SINT;
+ return VK_FORMAT_R16G16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SINT;
+ return VK_FORMAT_R16G16B16_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SINT;
- case Maxwell::VertexAttribute::Size::Size_32:
- return VK_FORMAT_R32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32:
- return VK_FORMAT_R32G32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32:
- return VK_FORMAT_R32G32B32_SINT;
- case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
- return VK_FORMAT_R32G32B32A32_SINT;
- default:
- break;
+ return VK_FORMAT_R16G16B16A16_SSCALED;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SSCALED_PACK32;
}
break;
case Maxwell::VertexAttribute::Type::UnsignedInt:
@@ -398,56 +408,50 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_UINT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_UINT;
- default:
- break;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_UINT_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::UnsignedScaled:
+ case Maxwell::VertexAttribute::Type::SignedInt:
switch (size) {
case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_USCALED;
+ return VK_FORMAT_R8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_USCALED;
+ return VK_FORMAT_R8G8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_USCALED;
+ return VK_FORMAT_R8G8B8_SINT;
case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_USCALED;
+ return VK_FORMAT_R8G8B8A8_SINT;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_USCALED;
+ return VK_FORMAT_R16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_USCALED;
+ return VK_FORMAT_R16G16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_USCALED;
+ return VK_FORMAT_R16G16B16_SINT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_USCALED;
- default:
- break;
+ return VK_FORMAT_R16G16B16A16_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32:
+ return VK_FORMAT_R32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32:
+ return VK_FORMAT_R32G32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32:
+ return VK_FORMAT_R32G32B32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
+ return VK_FORMAT_R32G32B32A32_SINT;
+ case Maxwell::VertexAttribute::Size::Size_10_10_10_2:
+ return VK_FORMAT_A2B10G10R10_SINT_PACK32;
}
break;
- case Maxwell::VertexAttribute::Type::SignedScaled:
+ case Maxwell::VertexAttribute::Type::Float:
switch (size) {
- case Maxwell::VertexAttribute::Size::Size_8:
- return VK_FORMAT_R8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8:
- return VK_FORMAT_R8G8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8:
- return VK_FORMAT_R8G8B8_SSCALED;
- case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
- return VK_FORMAT_R8G8B8A8_SSCALED;
case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SSCALED;
+ return VK_FORMAT_R16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SSCALED;
+ return VK_FORMAT_R16G16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SSCALED;
+ return VK_FORMAT_R16G16B16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SSCALED;
- default:
- break;
- }
- break;
- case Maxwell::VertexAttribute::Type::Float:
- switch (size) {
+ return VK_FORMAT_R16G16B16A16_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32:
return VK_FORMAT_R32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32:
@@ -456,16 +460,6 @@ VkFormat VertexFormat(Maxwell::VertexAttribute::Type type, Maxwell::VertexAttrib
return VK_FORMAT_R32G32B32_SFLOAT;
case Maxwell::VertexAttribute::Size::Size_32_32_32_32:
return VK_FORMAT_R32G32B32A32_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16:
- return VK_FORMAT_R16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16:
- return VK_FORMAT_R16G16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16:
- return VK_FORMAT_R16G16B16_SFLOAT;
- case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
- return VK_FORMAT_R16G16B16A16_SFLOAT;
- default:
- break;
}
break;
}
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index cd9673d1f..2d9b18ed9 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -155,11 +155,31 @@ vk::Instance CreateInstance(Common::DynamicLibrary& library, vk::InstanceDispatc
}
}
- static constexpr std::array layers_data{"VK_LAYER_LUNARG_standard_validation"};
- vk::Span<const char*> layers = layers_data;
- if (!enable_layers) {
- layers = {};
+ std::vector<const char*> layers;
+ layers.reserve(1);
+ if (enable_layers) {
+ layers.push_back("VK_LAYER_KHRONOS_validation");
+ }
+
+ const std::optional layer_properties = vk::EnumerateInstanceLayerProperties(dld);
+ if (!layer_properties) {
+ LOG_ERROR(Render_Vulkan, "Failed to query layer properties, disabling layers");
+ layers.clear();
+ }
+
+ for (auto layer_it = layers.begin(); layer_it != layers.end();) {
+ const char* const layer = *layer_it;
+ const auto it = std::find_if(
+ layer_properties->begin(), layer_properties->end(),
+ [layer](const VkLayerProperties& prop) { return !std::strcmp(layer, prop.layerName); });
+ if (it == layer_properties->end()) {
+ LOG_ERROR(Render_Vulkan, "Layer {} not available, removing it", layer);
+ layer_it = layers.erase(layer_it);
+ } else {
+ ++layer_it;
+ }
}
+
vk::Instance instance = vk::Instance::Create(layers, extensions, dld);
if (!instance) {
LOG_ERROR(Render_Vulkan, "Failed to create Vulkan instance");
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index e3714ee6d..a8d94eac3 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -143,6 +143,49 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
}
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveColorContents(bool is_clear, const Maxwell& regs) {
+ if (!is_clear) {
+ return true;
+ }
+ // First we have to make sure all clear masks are enabled.
+ if (!regs.clear_buffers.R || !regs.clear_buffers.G || !regs.clear_buffers.B ||
+ !regs.clear_buffers.A) {
+ return true;
+ }
+ // If scissors are disabled, the whole screen is cleared
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Then we have to confirm scissor testing clears the whole image
+ const std::size_t index = regs.clear_buffers.RT;
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.rt[index].width ||
+ scissor.max_y < regs.rt[index].height;
+}
+
+/// @brief Determine if an attachment to be updated has to preserve contents
+/// @param is_clear True when a clear is being executed
+/// @param regs 3D registers
+/// @return True when the contents have to be preserved
+bool HasToPreserveDepthContents(bool is_clear, const Maxwell& regs) {
+ // If we are not clearing, the contents have to be preserved
+ if (!is_clear) {
+ return true;
+ }
+ // For depth stencil clears we only have to confirm scissor test covers the whole image
+ if (!regs.clear_flags.scissor) {
+ return false;
+ }
+ // Make sure the clear cover the whole image
+ const auto& scissor = regs.scissor_test[0];
+ return scissor.min_x > 0 || scissor.min_y > 0 || scissor.max_x < regs.zeta_width ||
+ scissor.max_y < regs.zeta_height;
+}
+
} // Anonymous namespace
class BufferBindings final {
@@ -344,7 +387,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
buffer_cache.Unmap();
- const Texceptions texceptions = UpdateAttachments();
+ const Texceptions texceptions = UpdateAttachments(false);
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
key.renderpass_params = GetRenderPassParams(texceptions);
@@ -400,7 +443,7 @@ void RasterizerVulkan::Clear() {
return;
}
- [[maybe_unused]] const auto texceptions = UpdateAttachments();
+ [[maybe_unused]] const auto texceptions = UpdateAttachments(true);
DEBUG_ASSERT(texceptions.none());
SetupImageTransitions(0, color_attachments, zeta_attachment);
@@ -677,9 +720,12 @@ void RasterizerVulkan::FlushWork() {
draw_counter = 0;
}
-RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
+RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments(bool is_clear) {
MICROPROFILE_SCOPE(Vulkan_RenderTargets);
- auto& dirty = system.GPU().Maxwell3D().dirty.flags;
+ auto& maxwell3d = system.GPU().Maxwell3D();
+ auto& dirty = maxwell3d.dirty.flags;
+ auto& regs = maxwell3d.regs;
+
const bool update_rendertargets = dirty[VideoCommon::Dirty::RenderTargets];
dirty[VideoCommon::Dirty::RenderTargets] = false;
@@ -688,7 +734,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
Texceptions texceptions;
for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
if (update_rendertargets) {
- color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+ const bool preserve_contents = HasToPreserveColorContents(is_clear, regs);
+ color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, preserve_contents);
}
if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
texceptions[rt] = true;
@@ -696,7 +743,8 @@ RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
}
if (update_rendertargets) {
- zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+ const bool preserve_contents = HasToPreserveDepthContents(is_clear, regs);
+ zeta_attachment = texture_cache.GetDepthBufferSurface(preserve_contents);
}
if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
texceptions[ZETA_TEXCEPTION_INDEX] = true;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index c8c187606..83e00e7e9 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -159,7 +159,10 @@ private:
void FlushWork();
- Texceptions UpdateAttachments();
+ /// @brief Updates the currently bound attachments
+ /// @param is_clear True when the framebuffer is updated as a clear
+ /// @return Bitfield of attachments being used as sampled textures
+ Texceptions UpdateAttachments(bool is_clear);
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 82ec9180e..56524e6f3 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -9,6 +9,7 @@
#include <utility>
#include "common/microprofile.h"
+#include "common/thread.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_query_cache.h"
#include "video_core/renderer_vulkan/vk_resource_manager.h"
@@ -133,6 +134,7 @@ void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
}
void VKScheduler::WorkerThread() {
+ Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
std::unique_lock lock{mutex};
do {
cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
diff --git a/src/video_core/renderer_vulkan/wrapper.cpp b/src/video_core/renderer_vulkan/wrapper.cpp
index 42eff85d3..0d485a662 100644
--- a/src/video_core/renderer_vulkan/wrapper.cpp
+++ b/src/video_core/renderer_vulkan/wrapper.cpp
@@ -153,7 +153,8 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
bool Load(InstanceDispatch& dld) noexcept {
#define X(name) Proc(dld.name, dld, #name)
- return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties);
+ return X(vkCreateInstance) && X(vkEnumerateInstanceExtensionProperties) &&
+ X(vkEnumerateInstanceLayerProperties);
#undef X
}
@@ -770,4 +771,17 @@ std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProp
return properties;
}
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld) {
+ u32 num;
+ if (dld.vkEnumerateInstanceLayerProperties(&num, nullptr) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ std::vector<VkLayerProperties> properties(num);
+ if (dld.vkEnumerateInstanceLayerProperties(&num, properties.data()) != VK_SUCCESS) {
+ return std::nullopt;
+ }
+ return properties;
+}
+
} // namespace Vulkan::vk
diff --git a/src/video_core/renderer_vulkan/wrapper.h b/src/video_core/renderer_vulkan/wrapper.h
index da42ca88e..d56fdb3f9 100644
--- a/src/video_core/renderer_vulkan/wrapper.h
+++ b/src/video_core/renderer_vulkan/wrapper.h
@@ -141,6 +141,7 @@ struct InstanceDispatch {
PFN_vkCreateInstance vkCreateInstance;
PFN_vkDestroyInstance vkDestroyInstance;
PFN_vkEnumerateInstanceExtensionProperties vkEnumerateInstanceExtensionProperties;
+ PFN_vkEnumerateInstanceLayerProperties vkEnumerateInstanceLayerProperties;
PFN_vkCreateDebugUtilsMessengerEXT vkCreateDebugUtilsMessengerEXT;
PFN_vkCreateDevice vkCreateDevice;
@@ -996,4 +997,7 @@ private:
std::optional<std::vector<VkExtensionProperties>> EnumerateInstanceExtensionProperties(
const InstanceDispatch& dld);
+std::optional<std::vector<VkLayerProperties>> EnumerateInstanceLayerProperties(
+ const InstanceDispatch& dld);
+
} // namespace Vulkan::vk
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 848e46874..b2e88fa20 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -13,55 +13,101 @@
namespace VideoCommon::Shader {
+using std::move;
using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode;
+using Tegra::Shader::PredCondition;
u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
- if (instr.hset2.ftz == 0) {
- LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ PredCondition cond;
+ bool bf;
+ bool ftz;
+ bool neg_a;
+ bool abs_a;
+ bool neg_b;
+ bool abs_b;
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ case OpCode::Id::HSET2_IMM:
+ cond = instr.hsetp2.cbuf_and_imm.cond;
+ bf = instr.Bit(53);
+ ftz = instr.Bit(54);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(56);
+ abs_b = instr.Bit(54);
+ break;
+ case OpCode::Id::HSET2_R:
+ cond = instr.hsetp2.reg.cond;
+ bf = instr.Bit(49);
+ ftz = instr.Bit(50);
+ neg_a = instr.Bit(43);
+ abs_a = instr.Bit(44);
+ neg_b = instr.Bit(31);
+ abs_b = instr.Bit(30);
+ break;
+ default:
+ UNREACHABLE();
}
- Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
- op_a = GetOperandAbsNegHalf(op_a, instr.hset2.abs_a, instr.hset2.negate_a);
-
- Node op_b = [&]() {
+ Node op_b = [this, instr, opcode] {
switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_C:
+ // Inform as unimplemented as this is not tested.
+ UNIMPLEMENTED_MSG("HSET2_C is not implemented");
+ return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
case OpCode::Id::HSET2_R:
return GetRegister(instr.gpr20);
+ case OpCode::Id::HSET2_IMM:
+ return UnpackHalfImmediate(instr, true);
default:
UNREACHABLE();
- return Immediate(0);
+ return Node{};
}
}();
- op_b = UnpackHalfFloat(op_b, instr.hset2.type_b);
- op_b = GetOperandAbsNegHalf(op_b, instr.hset2.abs_b, instr.hset2.negate_b);
- const Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+ if (!ftz) {
+ LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+ }
+
+ Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
+ op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
+
+ switch (opcode->get().GetId()) {
+ case OpCode::Id::HSET2_R:
+ op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
+ [[fallthrough]];
+ case OpCode::Id::HSET2_C:
+ op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
+ break;
+ default:
+ break;
+ }
- const Node comparison_pair = GetPredicateComparisonHalf(instr.hset2.cond, op_a, op_b);
+ Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
+
+ Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
// HSET2 operates on each half float in the pack.
std::array<Node, 2> values;
for (u32 i = 0; i < 2; ++i) {
- const u32 raw_value = instr.hset2.bf ? 0x3c00 : 0xffff;
- const Node true_value = Immediate(raw_value << (i * 16));
- const Node false_value = Immediate(0);
-
- const Node comparison =
- Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
- const Node predicate = Operation(combiner, comparison, second_pred);
+ const u32 raw_value = bf ? 0x3c00 : 0xffff;
+ Node true_value = Immediate(raw_value << (i * 16));
+ Node false_value = Immediate(0);
+ Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
+ Node predicate = Operation(combiner, comparison, second_pred);
values[i] =
- Operation(OperationCode::Select, NO_PRECISE, predicate, true_value, false_value);
+ Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
}
- const Node value = Operation(OperationCode::UBitwiseOr, NO_PRECISE, values[0], values[1]);
- SetRegister(bb, instr.gpr0, value);
+ Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
+ SetRegister(bb, instr.gpr0, move(value));
return pc;
}
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 94d3a6ae5..0caf3b4f0 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -120,6 +120,9 @@ std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap(
}
const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)};
const auto layer{static_cast<u32>(relative_address / layer_size)};
+ if (layer >= params.depth) {
+ return {};
+ }
const GPUVAddr mipmap_address = relative_address - layer_size * layer;
const auto mipmap_it =
Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);