summaryrefslogtreecommitdiffstats
path: root/src/video_core
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2022-11-09 17:58:10 +0100
committerFernando Sahmkow <fsahmkow27@gmail.com>2023-01-01 22:43:57 +0100
commitaad0cbf024fb8077a9b375a093c60a7e2ab1db3d (patch)
tree8c6a86c92ed8cedbafb5f34dd9f72283eaaf4342 /src/video_core
parentMacroHLE: Add Index Buffer size estimation. (diff)
downloadyuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.gz
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.bz2
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.lz
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.xz
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.tar.zst
yuzu-aad0cbf024fb8077a9b375a093c60a7e2ab1db3d.zip
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/engines/maxwell_3d.cpp15
-rw-r--r--src/video_core/engines/maxwell_3d.h17
-rw-r--r--src/video_core/macro/macro_hle.cpp115
-rw-r--r--src/video_core/memory_manager.cpp10
-rw-r--r--src/video_core/memory_manager.h3
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.cpp1
-rw-r--r--src/video_core/renderer_vulkan/fixed_pipeline_state.h1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
-rw-r--r--src/video_core/shader_environment.cpp53
-rw-r--r--src/video_core/shader_environment.h21
10 files changed, 174 insertions, 64 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a0dd7400d..50d8a94b1 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -182,8 +182,14 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress();
- return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address),
- static_cast<size_t>(end_address - start_address));
+ constexpr std::array<size_t, 4> max_sizes = {
+ std::numeric_limits<u8>::max(), std::numeric_limits<u16>::max(),
+ std::numeric_limits<u32>::max(), std::numeric_limits<u32>::max()};
+ const size_t byte_size = regs.index_buffer.FormatSizeInBytes();
+ return std::min<size_t>(
+ memory_manager.GetMemoryLayoutSize(start_address, byte_size * max_sizes[byte_size]) /
+ byte_size,
+ static_cast<size_t>(end_address - start_address));
}
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
@@ -572,4 +578,9 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
return regs.reg_array[method];
}
+void Maxwell3D::setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name) {
+ const u64 key = (static_cast<u64>(bank) << 32) | offset;
+ replace_table.emplace(key, name);
+}
+
} // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index cfe1e4883..397e88f67 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -3020,6 +3020,23 @@ public:
/// Store temporary hw register values, used by some calls to restore state after a operation
Regs shadow_state;
+ // None Engine
+ enum class EngineHint : u32 {
+ None = 0x0,
+ OnHLEMacro = 0x1,
+ };
+
+ EngineHint engine_state{EngineHint::None};
+
+ enum class HLEReplaceName : u32 {
+ BaseVertex = 0x0,
+ BaseInstance = 0x1,
+ };
+
+ void setHLEReplacementName(u32 bank, u32 offset, HLEReplaceName name);
+
+ std::unordered_map<u64, HLEReplaceName> replace_table;
+
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), "Maxwell3D Regs has wrong size");
static_assert(std::is_trivially_copyable_v<Regs>, "Maxwell3D Regs must be trivially copyable");
diff --git a/src/video_core/macro/macro_hle.cpp b/src/video_core/macro/macro_hle.cpp
index 93b6d42a4..638247e55 100644
--- a/src/video_core/macro/macro_hle.cpp
+++ b/src/video_core/macro/macro_hle.cpp
@@ -14,26 +14,29 @@
#include "video_core/rasterizer_interface.h"
namespace Tegra {
+
+using Maxwell = Engines::Maxwell3D;
+
namespace {
-bool IsTopologySafe(Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology topology) {
+bool IsTopologySafe(Maxwell::Regs::PrimitiveTopology topology) {
switch (topology) {
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Points:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Lines:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineLoop:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStrip:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Triangles:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStrip:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleFan:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LinesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::LineStripAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TrianglesAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::TriangleStripAdjacency:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Patches:
+ case Maxwell::Regs::PrimitiveTopology::Points:
+ case Maxwell::Regs::PrimitiveTopology::Lines:
+ case Maxwell::Regs::PrimitiveTopology::LineLoop:
+ case Maxwell::Regs::PrimitiveTopology::LineStrip:
+ case Maxwell::Regs::PrimitiveTopology::Triangles:
+ case Maxwell::Regs::PrimitiveTopology::TriangleStrip:
+ case Maxwell::Regs::PrimitiveTopology::TriangleFan:
+ case Maxwell::Regs::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::Regs::PrimitiveTopology::LineStripAdjacency:
+ case Maxwell::Regs::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::Regs::PrimitiveTopology::TriangleStripAdjacency:
+ case Maxwell::Regs::PrimitiveTopology::Patches:
return true;
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Quads:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::QuadStrip:
- case Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology::Polygon:
+ case Maxwell::Regs::PrimitiveTopology::Quads:
+ case Maxwell::Regs::PrimitiveTopology::QuadStrip:
+ case Maxwell::Regs::PrimitiveTopology::Polygon:
default:
return false;
}
@@ -82,8 +85,7 @@ public:
: HLEMacroImpl(maxwell3d_), extended(extended_) {}
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
- auto topology =
- static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
+ auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
if (!IsTopologySafe(topology)) {
Fallback(parameters);
return;
@@ -99,18 +101,16 @@ public:
params.stride = 0;
if (extended) {
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, parameters[4], true);
+ maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
+ maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
}
maxwell3d.draw_manager->DrawArrayIndirect(topology);
if (extended) {
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, 0, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::None;
+ maxwell3d.replace_table.clear();
}
- maxwell3d.regs.vertex_buffer.first = 0;
- maxwell3d.regs.vertex_buffer.count = 0;
}
private:
@@ -134,13 +134,18 @@ private:
const u32 base_instance = parameters[4];
if (extended) {
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, base_instance, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
+ maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseInstance);
}
maxwell3d.draw_manager->DrawArray(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
vertex_first, vertex_count, base_instance, instance_count);
+
+ if (extended) {
+ maxwell3d.engine_state = Maxwell::EngineHint::None;
+ maxwell3d.replace_table.clear();
+ }
}
bool extended;
@@ -151,8 +156,7 @@ public:
explicit HLE_DrawIndexedIndirect(Engines::Maxwell3D& maxwell3d_) : HLEMacroImpl(maxwell3d_) {}
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
- auto topology =
- static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]);
+ auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[0]);
if (!IsTopologySafe(topology)) {
Fallback(parameters);
return;
@@ -164,16 +168,12 @@ public:
minimum_limit = std::max(parameters[3], minimum_limit);
}
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
- const u32 base_size = std::max(minimum_limit, estimate);
- const u32 element_base = parameters[4];
- const u32 base_instance = parameters[5];
- maxwell3d.regs.index_buffer.first = 0;
- maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping
+ const u32 base_size = std::max<u32>(minimum_limit, estimate);
maxwell3d.regs.draw.topology.Assign(topology);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, element_base, true);
- maxwell3d.CallMethod(0x8e5, base_instance, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
+ maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
+ maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true;
params.include_count = false;
@@ -184,9 +184,8 @@ public:
params.stride = 0;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, 0x0, true);
- maxwell3d.CallMethod(0x8e5, 0x0, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::None;
+ maxwell3d.replace_table.clear();
}
private:
@@ -197,18 +196,17 @@ private:
const u32 base_instance = parameters[5];
maxwell3d.regs.vertex_id_base = element_base;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, element_base, true);
- maxwell3d.CallMethod(0x8e5, base_instance, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
+ maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
+ maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.draw_manager->DrawIndex(
static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[0]),
parameters[3], parameters[1], element_base, base_instance, instance_count);
maxwell3d.regs.vertex_id_base = 0x0;
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, 0x0, true);
- maxwell3d.CallMethod(0x8e5, 0x0, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::None;
+ maxwell3d.replace_table.clear();
}
u32 minimum_limit{1 << 18};
@@ -238,8 +236,7 @@ public:
: HLEMacroImpl(maxwell3d_) {}
void Execute(const std::vector<u32>& parameters, [[maybe_unused]] u32 method) override {
- const auto topology =
- static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
+ const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
if (!IsTopologySafe(topology)) {
Fallback(parameters);
return;
@@ -277,9 +274,6 @@ public:
}
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
const u32 base_size = std::max(minimum_limit, estimate);
-
- maxwell3d.regs.index_buffer.first = 0;
- maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true;
@@ -290,7 +284,12 @@ public:
params.max_draw_counts = draw_count;
params.stride = stride;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
- maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, highest_limit);
+ maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
+ maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
+ maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
+ maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
+ maxwell3d.engine_state = Maxwell::EngineHint::None;
+ maxwell3d.replace_table.clear();
}
private:
@@ -299,9 +298,8 @@ private:
// Clean everything.
// Clean everything.
maxwell3d.regs.vertex_id_base = 0x0;
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, 0x0, true);
- maxwell3d.CallMethod(0x8e5, 0x0, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::None;
+ maxwell3d.replace_table.clear();
});
maxwell3d.RefreshParameters();
const u32 start_indirect = parameters[0];
@@ -310,8 +308,7 @@ private:
// Nothing to do.
return;
}
- const auto topology =
- static_cast<Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology>(parameters[2]);
+ const auto topology = static_cast<Maxwell::Regs::PrimitiveTopology>(parameters[2]);
maxwell3d.regs.draw.topology.Assign(topology);
const u32 padding = parameters[3];
const std::size_t max_draws = parameters[4];
@@ -326,9 +323,9 @@ private:
const u32 base_vertex = parameters[base + 3];
const u32 base_instance = parameters[base + 4];
maxwell3d.regs.vertex_id_base = base_vertex;
- maxwell3d.CallMethod(0x8e3, 0x640, true);
- maxwell3d.CallMethod(0x8e4, base_vertex, true);
- maxwell3d.CallMethod(0x8e5, base_instance, true);
+ maxwell3d.engine_state = Maxwell::EngineHint::OnHLEMacro;
+ maxwell3d.setHLEReplacementName(0, 0x640, Maxwell::HLEReplaceName::BaseVertex);
+ maxwell3d.setHLEReplacementName(0, 0x644, Maxwell::HLEReplaceName::BaseInstance);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.draw_manager->DrawIndex(topology, parameters[base + 2], parameters[base],
base_vertex, base_instance, parameters[base + 1]);
diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp
index 8f6c51045..11e7d225e 100644
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@@ -577,7 +577,7 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
return range_so_far;
}
-size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
+size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr, size_t max_size) const {
PTEKind base_kind = GetPageKind(gpu_addr);
if (base_kind == PTEKind::INVALID) {
return 0;
@@ -596,6 +596,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
return true;
}
range_so_far += copy_amount;
+ if (range_so_far >= max_size) {
+ result = true;
+ return true;
+ }
return false;
};
auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
@@ -605,6 +609,10 @@ size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
return true;
}
range_so_far += copy_amount;
+ if (range_so_far >= max_size) {
+ result = true;
+ return true;
+ }
return false;
};
auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h
index 65f6e8134..ca22520d7 100644
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@@ -118,7 +118,8 @@ public:
PTEKind GetPageKind(GPUVAddr gpu_addr) const;
- size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const;
+ size_t GetMemoryLayoutSize(GPUVAddr gpu_addr,
+ size_t max_size = std::numeric_limits<size_t>::max()) const;
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index e62b36822..df229f41b 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -97,6 +97,7 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
smooth_lines.Assign(regs.line_anti_alias_enable != 0 ? 1 : 0);
alpha_to_coverage_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_coverage != 0 ? 1 : 0);
alpha_to_one_enabled.Assign(regs.anti_alias_alpha_control.alpha_to_one != 0 ? 1 : 0);
+ app_stage.Assign(maxwell3d.engine_state);
for (size_t i = 0; i < regs.rt.size(); ++i) {
color_formats[i] = static_cast<u8>(regs.rt[i].format);
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index ab79fb8f3..03bf64b57 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -197,6 +197,7 @@ struct FixedPipelineState {
BitField<14, 1, u32> smooth_lines;
BitField<15, 1, u32> alpha_to_coverage_enabled;
BitField<16, 1, u32> alpha_to_one_enabled;
+ BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
};
std::array<u8, Maxwell::NumRenderTargets> color_formats;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index e7262420c..58b955821 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -54,7 +54,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
-constexpr u32 CACHE_VERSION = 8;
+constexpr u32 CACHE_VERSION = 9;
template <typename Container>
auto MakeSpan(Container& container) {
diff --git a/src/video_core/shader_environment.cpp b/src/video_core/shader_environment.cpp
index 958810747..99d85bfb3 100644
--- a/src/video_core/shader_environment.cpp
+++ b/src/video_core/shader_environment.cpp
@@ -202,12 +202,15 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
const u64 num_texture_pixel_formats{static_cast<u64>(texture_pixel_formats.size())};
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
+ const u64 num_cbuf_replacement_values{static_cast<u64>(cbuf_replacements.size())};
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
.write(reinterpret_cast<const char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+ .write(reinterpret_cast<const char*>(&num_cbuf_replacement_values),
+ sizeof(num_cbuf_replacement_values))
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
@@ -229,6 +232,10 @@ void GenericEnvironment::Serialize(std::ofstream& file) const {
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
.write(reinterpret_cast<const char*>(&type), sizeof(type));
}
+ for (const auto& [key, type] : cbuf_replacements) {
+ file.write(reinterpret_cast<const char*>(&key), sizeof(key))
+ .write(reinterpret_cast<const char*>(&type), sizeof(type));
+ }
if (stage == Shader::Stage::Compute) {
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
.write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
@@ -318,6 +325,8 @@ GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
ASSERT(local_size <= std::numeric_limits<u32>::max());
local_memory_size = static_cast<u32>(local_size) + sph.common3.shader_local_memory_crs_size;
texture_bound = maxwell3d->regs.bindless_texture_const_buffer_slot;
+ has_hle_engine_state =
+ maxwell3d->engine_state == Tegra::Engines::Maxwell3D::EngineHint::OnHLEMacro;
}
u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
@@ -331,6 +340,30 @@ u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
return value;
}
+std::optional<Shader::ReplaceConstant> GraphicsEnvironment::GetReplaceConstBuffer(u32 bank,
+ u32 offset) {
+ if (!has_hle_engine_state) {
+ return std::nullopt;
+ }
+ const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
+ auto it = maxwell3d->replace_table.find(key);
+ if (it == maxwell3d->replace_table.end()) {
+ return std::nullopt;
+ }
+ const auto converted_value = [](Tegra::Engines::Maxwell3D::HLEReplaceName name) {
+ switch (name) {
+ case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseVertex:
+ return Shader::ReplaceConstant::BaseVertex;
+ case Tegra::Engines::Maxwell3D::HLEReplaceName::BaseInstance:
+ return Shader::ReplaceConstant::BaseInstance;
+ default:
+ UNREACHABLE();
+ }
+ }(it->second);
+ cbuf_replacements.emplace(key, converted_value);
+ return converted_value;
+}
+
Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
const auto& regs{maxwell3d->regs};
const bool via_header_index{regs.sampler_binding == Maxwell::SamplerBinding::ViaHeaderBinding};
@@ -409,11 +442,14 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
u64 num_texture_types{};
u64 num_texture_pixel_formats{};
u64 num_cbuf_values{};
+ u64 num_cbuf_replacement_values{};
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
.read(reinterpret_cast<char*>(&num_texture_pixel_formats),
sizeof(num_texture_pixel_formats))
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
+ .read(reinterpret_cast<char*>(&num_cbuf_replacement_values),
+ sizeof(num_cbuf_replacement_values))
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
@@ -444,6 +480,13 @@ void FileEnvironment::Deserialize(std::ifstream& file) {
.read(reinterpret_cast<char*>(&value), sizeof(value));
cbuf_values.emplace(key, value);
}
+ for (size_t i = 0; i < num_cbuf_replacement_values; ++i) {
+ u64 key;
+ Shader::ReplaceConstant value;
+ file.read(reinterpret_cast<char*>(&key), sizeof(key))
+ .read(reinterpret_cast<char*>(&value), sizeof(value));
+ cbuf_replacements.emplace(key, value);
+ }
if (stage == Shader::Stage::Compute) {
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
@@ -512,6 +555,16 @@ std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
return workgroup_size;
}
+std::optional<Shader::ReplaceConstant> FileEnvironment::GetReplaceConstBuffer(u32 bank,
+ u32 offset) {
+ const u64 key = (static_cast<u64>(bank) << 32) | static_cast<u64>(offset);
+ auto it = cbuf_replacements.find(key);
+ if (it == cbuf_replacements.end()) {
+ return std::nullopt;
+ }
+ return it->second;
+}
+
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
const std::filesystem::path& filename, u32 cache_version) try {
std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
diff --git a/src/video_core/shader_environment.h b/src/video_core/shader_environment.h
index 1342fab1e..d75987a52 100644
--- a/src/video_core/shader_environment.h
+++ b/src/video_core/shader_environment.h
@@ -60,6 +60,10 @@ public:
void Serialize(std::ofstream& file) const;
+ bool HasHLEMacroState() const override {
+ return has_hle_engine_state;
+ }
+
protected:
std::optional<u64> TryFindSize();
@@ -73,6 +77,7 @@ protected:
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
+ std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
u32 local_memory_size{};
u32 texture_bound{};
@@ -89,6 +94,7 @@ protected:
u32 viewport_transform_state = 1;
bool has_unbound_instructions = false;
+ bool has_hle_engine_state = false;
};
class GraphicsEnvironment final : public GenericEnvironment {
@@ -109,6 +115,8 @@ public:
u32 ReadViewportTransformState() override;
+ std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank, u32 offset) override;
+
private:
Tegra::Engines::Maxwell3D* maxwell3d{};
size_t stage_index{};
@@ -131,6 +139,11 @@ public:
u32 ReadViewportTransformState() override;
+ std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(
+ [[maybe_unused]] u32 bank, [[maybe_unused]] u32 offset) override {
+ return std::nullopt;
+ }
+
private:
Tegra::Engines::KeplerCompute* kepler_compute{};
};
@@ -166,6 +179,13 @@ public:
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
+ [[nodiscard]] std::optional<Shader::ReplaceConstant> GetReplaceConstBuffer(u32 bank,
+ u32 offset) override;
+
+ [[nodiscard]] bool HasHLEMacroState() const override {
+ return cbuf_replacements.size() != 0;
+ }
+
void Dump(u64 hash) override;
private:
@@ -173,6 +193,7 @@ private:
std::unordered_map<u32, Shader::TextureType> texture_types;
std::unordered_map<u32, Shader::TexturePixelFormat> texture_pixel_formats;
std::unordered_map<u64, u32> cbuf_values;
+ std::unordered_map<u64, Shader::ReplaceConstant> cbuf_replacements;
std::array<u32, 3> workgroup_size{};
u32 local_memory_size{};
u32 shared_memory_size{};