summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/engines/shader_bytecode.h12
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp90
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h4
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp127
-rw-r--r--src/video_core/renderer_vulkan/vk_staging_buffer_pool.h83
-rw-r--r--src/video_core/shader/decode/texture.cpp108
-rw-r--r--src/video_core/shader/node.h1
-rw-r--r--src/video_core/shader/shader_ir.h5
10 files changed, 361 insertions, 74 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index da8423214..2594cd0bd 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -171,6 +171,8 @@ if (ENABLE_VULKAN)
renderer_vulkan/vk_scheduler.h
renderer_vulkan/vk_shader_decompiler.cpp
renderer_vulkan/vk_shader_decompiler.h
+ renderer_vulkan/vk_staging_buffer_pool.cpp
+ renderer_vulkan/vk_staging_buffer_pool.h
renderer_vulkan/vk_stream_buffer.cpp
renderer_vulkan/vk_stream_buffer.h
renderer_vulkan/vk_swapchain.cpp
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index dfb12cd2d..412ca5551 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1239,7 +1239,7 @@ union Instruction {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
- BitField<54, 2, u64> info;
+ BitField<54, 2, u64> offset_mode;
BitField<56, 2, u64> component;
bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1251,9 +1251,9 @@ union Instruction {
case TextureMiscMode::DC:
return dc_flag != 0;
case TextureMiscMode::AOFFI:
- return info == 1;
+ return offset_mode == 1;
case TextureMiscMode::PTP:
- return info == 2;
+ return offset_mode == 2;
default:
break;
}
@@ -1265,7 +1265,7 @@ union Instruction {
BitField<35, 1, u64> ndv_flag;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
- BitField<33, 2, u64> info;
+ BitField<33, 2, u64> offset_mode;
BitField<37, 2, u64> component;
bool UsesMiscMode(TextureMiscMode mode) const {
@@ -1277,9 +1277,9 @@ union Instruction {
case TextureMiscMode::DC:
return dc_flag != 0;
case TextureMiscMode::AOFFI:
- return info == 1;
+ return offset_mode == 1;
case TextureMiscMode::PTP:
- return info == 2;
+ return offset_mode == 2;
default:
break;
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index dbb08dd80..672051102 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -271,6 +271,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
case Maxwell::ShaderProgram::Geometry:
shader_program_manager->UseTrivialGeometryShader();
break;
+ case Maxwell::ShaderProgram::Fragment:
+ shader_program_manager->UseTrivialFragmentShader();
+ break;
default:
break;
}
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0389c2143..a311dbcfe 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -48,10 +48,10 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
-struct TextureAoffi {};
+struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
-using TextureIR = std::variant<TextureAoffi, TextureDerivates, TextureArgument>;
+using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>;
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
@@ -1077,7 +1077,7 @@ private:
}
std::string GenerateTexture(Operation operation, const std::string& function_suffix,
- const std::vector<TextureIR>& extras, bool sepparate_dc = false) {
+ const std::vector<TextureIR>& extras, bool separate_dc = false) {
constexpr std::array coord_constructors = {"float", "vec2", "vec3", "vec4"};
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
@@ -1090,10 +1090,12 @@ private:
std::string expr = "texture" + function_suffix;
if (!meta->aoffi.empty()) {
expr += "Offset";
+ } else if (!meta->ptp.empty()) {
+ expr += "Offsets";
}
expr += '(' + GetSampler(meta->sampler) + ", ";
expr += coord_constructors.at(count + (has_array ? 1 : 0) +
- (has_shadow && !sepparate_dc ? 1 : 0) - 1);
+ (has_shadow && !separate_dc ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
expr += Visit(operation[i]).AsFloat();
@@ -1106,7 +1108,7 @@ private:
expr += ", float(" + Visit(meta->array).AsInt() + ')';
}
if (has_shadow) {
- if (sepparate_dc) {
+ if (separate_dc) {
expr += "), " + Visit(meta->depth_compare).AsFloat();
} else {
expr += ", " + Visit(meta->depth_compare).AsFloat() + ')';
@@ -1118,8 +1120,12 @@ private:
for (const auto& variant : extras) {
if (const auto argument = std::get_if<TextureArgument>(&variant)) {
expr += GenerateTextureArgument(*argument);
- } else if (std::holds_alternative<TextureAoffi>(variant)) {
- expr += GenerateTextureAoffi(meta->aoffi);
+ } else if (std::holds_alternative<TextureOffset>(variant)) {
+ if (!meta->aoffi.empty()) {
+ expr += GenerateTextureAoffi(meta->aoffi);
+ } else if (!meta->ptp.empty()) {
+ expr += GenerateTexturePtp(meta->ptp);
+ }
} else if (std::holds_alternative<TextureDerivates>(variant)) {
expr += GenerateTextureDerivates(meta->derivates);
} else {
@@ -1160,6 +1166,20 @@ private:
return expr;
}
+ std::string ReadTextureOffset(const Node& value) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*value)) {
+ // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
+ // to be constant by the standard).
+ return std::to_string(static_cast<s32>(immediate->GetValue()));
+ } else if (device.HasVariableAoffi()) {
+ // Avoid using variable AOFFI on unsupported devices.
+ return Visit(value).AsInt();
+ } else {
+ // Insert 0 on devices not supporting variable AOFFI.
+ return "0";
+ }
+ }
+
std::string GenerateTextureAoffi(const std::vector<Node>& aoffi) {
if (aoffi.empty()) {
return {};
@@ -1170,18 +1190,7 @@ private:
expr += '(';
for (std::size_t index = 0; index < aoffi.size(); ++index) {
- const auto operand{aoffi.at(index)};
- if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
- // Inline the string as an immediate integer in GLSL (AOFFI arguments are required
- // to be constant by the standard).
- expr += std::to_string(static_cast<s32>(immediate->GetValue()));
- } else if (device.HasVariableAoffi()) {
- // Avoid using variable AOFFI on unsupported devices.
- expr += Visit(operand).AsInt();
- } else {
- // Insert 0 on devices not supporting variable AOFFI.
- expr += '0';
- }
+ expr += ReadTextureOffset(aoffi.at(index));
if (index + 1 < aoffi.size()) {
expr += ", ";
}
@@ -1191,6 +1200,20 @@ private:
return expr;
}
+ std::string GenerateTexturePtp(const std::vector<Node>& ptp) {
+ static constexpr std::size_t num_vectors = 4;
+ ASSERT(ptp.size() == num_vectors * 2);
+
+ std::string expr = ", ivec2[](";
+ for (std::size_t vector = 0; vector < num_vectors; ++vector) {
+ const bool has_next = vector + 1 < num_vectors;
+ expr += fmt::format("ivec2({}, {}){}", ReadTextureOffset(ptp.at(vector * 2)),
+ ReadTextureOffset(ptp.at(vector * 2 + 1)), has_next ? ", " : "");
+ }
+ expr += ')';
+ return expr;
+ }
+
std::string GenerateTextureDerivates(const std::vector<Node>& derivates) {
if (derivates.empty()) {
return {};
@@ -1689,7 +1712,7 @@ private:
ASSERT(meta);
std::string expr = GenerateTexture(
- operation, "", {TextureAoffi{}, TextureArgument{Type::Float, meta->bias}});
+ operation, "", {TextureOffset{}, TextureArgument{Type::Float, meta->bias}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1701,7 +1724,7 @@ private:
ASSERT(meta);
std::string expr = GenerateTexture(
- operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureAoffi{}});
+ operation, "Lod", {TextureArgument{Type::Float, meta->lod}, TextureOffset{}});
if (meta->sampler.IsShadow()) {
expr = "vec4(" + expr + ')';
}
@@ -1709,21 +1732,19 @@ private:
}
Expression TextureGather(Operation operation) {
- const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
- ASSERT(meta);
+ const auto& meta = std::get<MetaTexture>(operation.GetMeta());
- const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
- if (meta->sampler.IsShadow()) {
- return {GenerateTexture(operation, "Gather", {TextureAoffi{}}, true) +
- GetSwizzle(meta->element),
- Type::Float};
+ const auto type = meta.sampler.IsShadow() ? Type::Float : Type::Int;
+ const bool separate_dc = meta.sampler.IsShadow();
+
+ std::vector<TextureIR> ir;
+ if (meta.sampler.IsShadow()) {
+ ir = {TextureOffset{}};
} else {
- return {GenerateTexture(operation, "Gather",
- {TextureAoffi{}, TextureArgument{type, meta->component}},
- false) +
- GetSwizzle(meta->element),
- Type::Float};
+ ir = {TextureOffset{}, TextureArgument{type, meta.component}};
}
+ return {GenerateTexture(operation, "Gather", ir, separate_dc) + GetSwizzle(meta.element),
+ Type::Float};
}
Expression TextureQueryDimensions(Operation operation) {
@@ -1794,7 +1815,8 @@ private:
const auto meta = std::get_if<MetaTexture>(&operation.GetMeta());
ASSERT(meta);
- std::string expr = GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureAoffi{}});
+ std::string expr =
+ GenerateTexture(operation, "Grad", {TextureDerivates{}, TextureOffset{}});
return {std::move(expr) + GetSwizzle(meta->element), Type::Float};
}
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 3703e7018..478c165ce 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -50,6 +50,10 @@ public:
current_state.geometry_shader = 0;
}
+ void UseTrivialFragmentShader() {
+ current_state.fragment_shader = 0;
+ }
+
private:
struct PipelineState {
bool operator==(const PipelineState& rhs) const {
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
new file mode 100644
index 000000000..171d78afc
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -0,0 +1,127 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/bit_util.h"
+#include "common/common_types.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+
+namespace Vulkan {
+
+VKStagingBufferPool::StagingBuffer::StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence,
+ u64 last_epoch)
+ : buffer{std::move(buffer)}, watch{fence}, last_epoch{last_epoch} {}
+
+VKStagingBufferPool::StagingBuffer::StagingBuffer(StagingBuffer&& rhs) noexcept {
+ buffer = std::move(rhs.buffer);
+ watch = std::move(rhs.watch);
+ last_epoch = rhs.last_epoch;
+}
+
+VKStagingBufferPool::StagingBuffer::~StagingBuffer() = default;
+
+VKStagingBufferPool::StagingBuffer& VKStagingBufferPool::StagingBuffer::operator=(
+ StagingBuffer&& rhs) noexcept {
+ buffer = std::move(rhs.buffer);
+ watch = std::move(rhs.watch);
+ last_epoch = rhs.last_epoch;
+ return *this;
+}
+
+VKStagingBufferPool::VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler)
+ : device{device}, memory_manager{memory_manager}, scheduler{scheduler},
+ is_device_integrated{device.IsIntegrated()} {}
+
+VKStagingBufferPool::~VKStagingBufferPool() = default;
+
+VKBuffer& VKStagingBufferPool::GetUnusedBuffer(std::size_t size, bool host_visible) {
+ if (const auto buffer = TryGetReservedBuffer(size, host_visible)) {
+ return *buffer;
+ }
+ return CreateStagingBuffer(size, host_visible);
+}
+
+void VKStagingBufferPool::TickFrame() {
+ ++epoch;
+ current_delete_level = (current_delete_level + 1) % NumLevels;
+
+ ReleaseCache(true);
+ if (!is_device_integrated) {
+ ReleaseCache(false);
+ }
+}
+
+VKBuffer* VKStagingBufferPool::TryGetReservedBuffer(std::size_t size, bool host_visible) {
+ for (auto& entry : GetCache(host_visible)[Common::Log2Ceil64(size)].entries) {
+ if (entry.watch.TryWatch(scheduler.GetFence())) {
+ entry.last_epoch = epoch;
+ return &*entry.buffer;
+ }
+ }
+ return nullptr;
+}
+
+VKBuffer& VKStagingBufferPool::CreateStagingBuffer(std::size_t size, bool host_visible) {
+ const auto usage =
+ vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst |
+ vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer;
+ const u32 log2 = Common::Log2Ceil64(size);
+ const vk::BufferCreateInfo buffer_ci({}, 1ULL << log2, usage, vk::SharingMode::eExclusive, 0,
+ nullptr);
+ const auto dev = device.GetLogical();
+ auto buffer = std::make_unique<VKBuffer>();
+ buffer->handle = dev.createBufferUnique(buffer_ci, nullptr, device.GetDispatchLoader());
+ buffer->commit = memory_manager.Commit(*buffer->handle, host_visible);
+
+ auto& entries = GetCache(host_visible)[log2].entries;
+ return *entries.emplace_back(std::move(buffer), scheduler.GetFence(), epoch).buffer;
+}
+
+VKStagingBufferPool::StagingBuffersCache& VKStagingBufferPool::GetCache(bool host_visible) {
+ return is_device_integrated || host_visible ? host_staging_buffers : device_staging_buffers;
+}
+
+void VKStagingBufferPool::ReleaseCache(bool host_visible) {
+ auto& cache = GetCache(host_visible);
+ const u64 size = ReleaseLevel(cache, current_delete_level);
+ if (size == 0) {
+ return;
+ }
+}
+
+u64 VKStagingBufferPool::ReleaseLevel(StagingBuffersCache& cache, std::size_t log2) {
+ static constexpr u64 epochs_to_destroy = 180;
+ static constexpr std::size_t deletions_per_tick = 16;
+
+ auto& staging = cache[log2];
+ auto& entries = staging.entries;
+ const std::size_t old_size = entries.size();
+
+ const auto is_deleteable = [this](const auto& entry) {
+ return entry.last_epoch + epochs_to_destroy < epoch && !entry.watch.IsUsed();
+ };
+ const std::size_t begin_offset = staging.delete_index;
+ const std::size_t end_offset = std::min(begin_offset + deletions_per_tick, old_size);
+ const auto begin = std::begin(entries) + begin_offset;
+ const auto end = std::begin(entries) + end_offset;
+ entries.erase(std::remove_if(begin, end, is_deleteable), end);
+
+ const std::size_t new_size = entries.size();
+ staging.delete_index += deletions_per_tick;
+ if (staging.delete_index >= new_size) {
+ staging.delete_index = 0;
+ }
+
+ return (1ULL << log2) * (old_size - new_size);
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
new file mode 100644
index 000000000..02310375f
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -0,0 +1,83 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <climits>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/common_types.h"
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+
+namespace Vulkan {
+
+class VKDevice;
+class VKFenceWatch;
+class VKScheduler;
+
+struct VKBuffer final {
+ UniqueBuffer handle;
+ VKMemoryCommit commit;
+};
+
+class VKStagingBufferPool final {
+public:
+ explicit VKStagingBufferPool(const VKDevice& device, VKMemoryManager& memory_manager,
+ VKScheduler& scheduler);
+ ~VKStagingBufferPool();
+
+ VKBuffer& GetUnusedBuffer(std::size_t size, bool host_visible);
+
+ void TickFrame();
+
+private:
+ struct StagingBuffer final {
+ explicit StagingBuffer(std::unique_ptr<VKBuffer> buffer, VKFence& fence, u64 last_epoch);
+ StagingBuffer(StagingBuffer&& rhs) noexcept;
+ StagingBuffer(const StagingBuffer&) = delete;
+ ~StagingBuffer();
+
+ StagingBuffer& operator=(StagingBuffer&& rhs) noexcept;
+
+ std::unique_ptr<VKBuffer> buffer;
+ VKFenceWatch watch;
+ u64 last_epoch = 0;
+ };
+
+ struct StagingBuffers final {
+ std::vector<StagingBuffer> entries;
+ std::size_t delete_index = 0;
+ };
+
+ static constexpr std::size_t NumLevels = sizeof(std::size_t) * CHAR_BIT;
+ using StagingBuffersCache = std::array<StagingBuffers, NumLevels>;
+
+ VKBuffer* TryGetReservedBuffer(std::size_t size, bool host_visible);
+
+ VKBuffer& CreateStagingBuffer(std::size_t size, bool host_visible);
+
+ StagingBuffersCache& GetCache(bool host_visible);
+
+ void ReleaseCache(bool host_visible);
+
+ u64 ReleaseLevel(StagingBuffersCache& cache, std::size_t log2);
+
+ const VKDevice& device;
+ VKMemoryManager& memory_manager;
+ VKScheduler& scheduler;
+ const bool is_device_integrated;
+
+ StagingBuffersCache host_staging_buffers;
+ StagingBuffersCache device_staging_buffers;
+
+ u64 epoch = 0;
+
+ std::size_t current_delete_level = 0;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index dff01a541..4b14cdf58 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -89,59 +89,62 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
[[fallthrough]];
}
case OpCode::Id::TLD4: {
- ASSERT(instr.tld4.array == 0);
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
"NDV is not implemented");
- UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
- "PTP is not implemented");
-
const auto texture_type = instr.tld4.texture_type.Value();
const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
: instr.tld4.UsesMiscMode(TextureMiscMode::DC);
const bool is_array = instr.tld4.array != 0;
const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
: instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
- WriteTexInstructionFloat(
- bb, instr,
- GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
+ const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
+ : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
+ WriteTexInstructionFloat(bb, instr,
+ GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
+ is_ptp, is_bindless));
break;
}
case OpCode::Id::TLD4S: {
- const bool uses_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
- UNIMPLEMENTED_IF_MSG(uses_aoffi, "AOFFI is not implemented");
-
- const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
+ constexpr std::size_t num_coords = 2;
+ const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
+ const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
const Node op_a = GetRegister(instr.gpr8);
const Node op_b = GetRegister(instr.gpr20);
// TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
std::vector<Node> coords;
- Node dc_reg;
- if (depth_compare) {
+ std::vector<Node> aoffi;
+ Node depth_compare;
+ if (is_depth_compare) {
// Note: TLD4S coordinate encoding works just like TEXS's
const Node op_y = GetRegister(instr.gpr8.Value() + 1);
coords.push_back(op_a);
coords.push_back(op_y);
- dc_reg = uses_aoffi ? GetRegister(instr.gpr20.Value() + 1) : op_b;
+ if (is_aoffi) {
+ aoffi = GetAoffiCoordinates(op_b, num_coords, true);
+ depth_compare = GetRegister(instr.gpr20.Value() + 1);
+ } else {
+ depth_compare = op_b;
+ }
} else {
+ // There's no depth compare
coords.push_back(op_a);
- if (uses_aoffi) {
- const Node op_y = GetRegister(instr.gpr8.Value() + 1);
- coords.push_back(op_y);
+ if (is_aoffi) {
+ coords.push_back(GetRegister(instr.gpr8.Value() + 1));
+ aoffi = GetAoffiCoordinates(op_b, num_coords, true);
} else {
coords.push_back(op_b);
}
- dc_reg = {};
}
const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
- const SamplerInfo info{TextureType::Texture2D, false, depth_compare};
+ const SamplerInfo info{TextureType::Texture2D, false, is_depth_compare};
const Sampler& sampler = *GetSampler(instr.sampler, info);
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, {}, dc_reg, {}, {}, {}, {}, component, element};
+ MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -190,7 +193,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
for (u32 element = 0; element < values.size(); ++element) {
- MetaTexture meta{*sampler, {}, {}, {}, derivates, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};
values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
}
@@ -230,7 +233,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
if (!instr.txq.IsComponentEnabled(element)) {
continue;
}
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -299,7 +302,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
continue;
}
auto params = coords;
- MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, element};
+ MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporary(bb, indexer++, value);
}
@@ -367,7 +370,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
if (it != used_samplers.end()) {
ASSERT(!it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
it->IsShadow() == info.is_shadow && it->IsBuffer() == info.is_buffer);
- return &(*it);
+ return &*it;
}
// Otherwise create a new mapping for this sampler
@@ -397,7 +400,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
if (it != used_samplers.end()) {
ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
it->IsShadow() == info.is_shadow);
- return &(*it);
+ return &*it;
}
// Otherwise create a new mapping for this sampler
@@ -538,7 +541,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
for (u32 element = 0; element < values.size(); ++element) {
auto copy_coords = coords;
- MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, bias, lod, {}, element};
+ MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
values[element] = Operation(read_method, meta, std::move(copy_coords));
}
@@ -635,7 +638,9 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
- bool is_array, bool is_aoffi, bool is_bindless) {
+ bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
+ ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
+
const std::size_t coord_count = GetCoordCount(texture_type);
// If enabled arrays index is always stored in the gpr8 field
@@ -661,12 +666,15 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
return values;
}
- std::vector<Node> aoffi;
+ std::vector<Node> aoffi, ptp;
if (is_aoffi) {
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
+ } else if (is_ptp) {
+ ptp = GetPtpCoordinates(
+ {GetRegister(parameter_register++), GetRegister(parameter_register++)});
}
- Node dc{};
+ Node dc;
if (depth_compare) {
dc = GetRegister(parameter_register++);
}
@@ -676,8 +684,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{*sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, component,
- element};
+ MetaTexture meta{
+ *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
}
@@ -710,7 +718,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array_register, {}, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
@@ -760,7 +768,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
Node4 values;
for (u32 element = 0; element < values.size(); ++element) {
auto coords_copy = coords;
- MetaTexture meta{sampler, array, {}, {}, {}, {}, lod, {}, element};
+ MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
}
return values;
@@ -825,4 +833,38 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
return aoffi;
}
+std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
+ static constexpr u32 num_entries = 8;
+
+ std::vector<Node> ptp;
+ ptp.reserve(num_entries);
+
+ const auto global_size = static_cast<s64>(global_code.size());
+ const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
+ const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
+ if (!low || !high) {
+ for (u32 entry = 0; entry < num_entries; ++entry) {
+ const u32 reg = entry / 4;
+ const u32 offset = entry % 4;
+ const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
+ const Node condition =
+ Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
+ const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
+ ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
+ }
+ return ptp;
+ }
+
+ const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
+ for (u32 entry = 0; entry < num_entries; ++entry) {
+ s32 value = (immediate >> (entry * 8)) & 0b111111;
+ if (value >= 32) {
+ value -= 64;
+ }
+ ptp.push_back(Immediate(value));
+ }
+
+ return ptp;
+}
+
} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index abd40f582..4d2f4d6a8 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -374,6 +374,7 @@ struct MetaTexture {
Node array;
Node depth_compare;
std::vector<Node> aoffi;
+ std::vector<Node> ptp;
std::vector<Node> derivates;
Node bias;
Node lod;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 04ae5f822..baed06ccd 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -350,7 +350,8 @@ private:
bool is_array);
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
- bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
+ bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
+ bool is_bindless);
Node4 GetTldCode(Tegra::Shader::Instruction instr);
@@ -363,6 +364,8 @@ private:
std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
+ std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
+
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,