summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp127
-rw-r--r--src/shader_recompiler/frontend/maxwell/structured_control_flow.h9
-rw-r--r--src/shader_recompiler/frontend/maxwell/translate_program.cpp2
-rw-r--r--src/shader_recompiler/host_translate_info.h5
-rw-r--r--src/video_core/renderer_opengl/gl_device.h4
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp1
-rw-r--r--src/video_core/renderer_vulkan/vk_pipeline_cache.cpp2
7 files changed, 140 insertions, 10 deletions
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
index 8b3e0a15c..69eeaa3e6 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.cpp
@@ -20,6 +20,7 @@
#include "shader_recompiler/frontend/maxwell/decode.h"
#include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
#include "shader_recompiler/frontend/maxwell/translate/translate.h"
+#include "shader_recompiler/host_translate_info.h"
#include "shader_recompiler/object_pool.h"
namespace Shader::Maxwell {
@@ -652,7 +653,7 @@ class TranslatePass {
public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt,
- IR::AbstractSyntaxList& syntax_list_)
+ IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_},
syntax_list{syntax_list_} {
Visit(root_stmt, nullptr, nullptr);
@@ -660,6 +661,9 @@ public:
IR::Block& first_block{*syntax_list.front().data.block};
IR::IREmitter ir(first_block, first_block.begin());
ir.Prologue();
+ if (uses_demote_to_helper && host_info.needs_demote_reorder) {
+ DemoteCombinationPass();
+ }
}
private:
@@ -809,7 +813,14 @@ private:
}
case StatementType::Return: {
ensure_block();
- IR::IREmitter{*current_block}.Epilogue();
+ IR::Block* return_block{block_pool.Create(inst_pool)};
+ IR::IREmitter{*return_block}.Epilogue();
+ current_block->AddBranch(return_block);
+
+ auto& merge{syntax_list.emplace_back()};
+ merge.type = IR::AbstractSyntaxNode::Type::Block;
+ merge.data.block = return_block;
+
current_block = nullptr;
syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return;
break;
@@ -824,6 +835,7 @@ private:
auto& merge{syntax_list.emplace_back()};
merge.type = IR::AbstractSyntaxNode::Type::Block;
merge.data.block = demote_block;
+ uses_demote_to_helper = true;
break;
}
case StatementType::Unreachable: {
@@ -855,11 +867,117 @@ private:
return block_pool.Create(inst_pool);
}
+ void DemoteCombinationPass() {
+ using Type = IR::AbstractSyntaxNode::Type;
+ std::vector<IR::Block*> demote_blocks;
+ std::vector<IR::U1> demote_conds;
+ u32 num_epilogues{};
+ u32 branch_depth{};
+ for (const IR::AbstractSyntaxNode& node : syntax_list) {
+ if (node.type == Type::If) {
+ ++branch_depth;
+ }
+ if (node.type == Type::EndIf) {
+ --branch_depth;
+ }
+ if (node.type != Type::Block) {
+ continue;
+ }
+ if (branch_depth > 1) {
+ // Skip reordering nested demote branches.
+ continue;
+ }
+ for (const IR::Inst& inst : node.data.block->Instructions()) {
+ const IR::Opcode op{inst.GetOpcode()};
+ if (op == IR::Opcode::DemoteToHelperInvocation) {
+ demote_blocks.push_back(node.data.block);
+ break;
+ }
+ if (op == IR::Opcode::Epilogue) {
+ ++num_epilogues;
+ }
+ }
+ }
+ if (demote_blocks.size() == 0) {
+ return;
+ }
+ if (num_epilogues > 1) {
+ LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented.");
+ return;
+ }
+ s64 last_iterator_offset{};
+ auto& asl{syntax_list};
+ for (const IR::Block* demote_block : demote_blocks) {
+ const auto start_it{asl.begin() + last_iterator_offset};
+ auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+ return asn.type == Type::If && asn.data.if_node.body == demote_block;
+ })};
+ if (asl_it == asl.end()) {
+ // Demote without a conditional branch.
+ // No need to proceed since all fragment instances will be demoted regardless.
+ return;
+ }
+ const IR::Block* const end_if = asl_it->data.if_node.merge;
+ demote_conds.push_back(asl_it->data.if_node.cond);
+ last_iterator_offset = std::distance(asl.begin(), asl_it);
+
+ asl_it = asl.erase(asl_it);
+ asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+ return asn.type == Type::Block && asn.data.block == demote_block;
+ });
+
+ asl_it = asl.erase(asl_it);
+ asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) {
+ return asn.type == Type::EndIf && asn.data.end_if.merge == end_if;
+ });
+ asl_it = asl.erase(asl_it);
+ }
+ const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) {
+ if (asn.type != Type::Block) {
+ return false;
+ }
+ for (const auto& inst : asn.data.block->Instructions()) {
+ if (inst.GetOpcode() == IR::Opcode::Epilogue) {
+ return true;
+ }
+ }
+ return false;
+ }};
+ const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)};
+ const auto return_block_it{(reverse_it + 1).base()};
+
+ IR::IREmitter ir{*(return_block_it - 1)->data.block};
+ IR::U1 cond(IR::Value(false));
+ for (const auto& demote_cond : demote_conds) {
+ cond = ir.LogicalOr(cond, demote_cond);
+ }
+ cond.Inst()->DestructiveAddUsage(1);
+
+ IR::AbstractSyntaxNode demote_if_node{};
+ demote_if_node.type = Type::If;
+ demote_if_node.data.if_node.cond = cond;
+ demote_if_node.data.if_node.body = demote_blocks[0];
+ demote_if_node.data.if_node.merge = return_block_it->data.block;
+
+ IR::AbstractSyntaxNode demote_node{};
+ demote_node.type = Type::Block;
+ demote_node.data.block = demote_blocks[0];
+
+ IR::AbstractSyntaxNode demote_endif_node{};
+ demote_endif_node.type = Type::EndIf;
+ demote_endif_node.data.end_if.merge = return_block_it->data.block;
+
+ asl.insert(return_block_it, demote_endif_node);
+ asl.insert(return_block_it, demote_node);
+ asl.insert(return_block_it, demote_if_node);
+ }
+
ObjectPool<Statement>& stmt_pool;
ObjectPool<IR::Inst>& inst_pool;
ObjectPool<IR::Block>& block_pool;
Environment& env;
IR::AbstractSyntaxList& syntax_list;
+ bool uses_demote_to_helper{};
// TODO: C++20 Remove this when all compilers support constexpr std::vector
#if __cpp_lib_constexpr_vector >= 201907
@@ -871,12 +989,13 @@ private:
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
- Environment& env, Flow::CFG& cfg) {
+ Environment& env, Flow::CFG& cfg,
+ const HostTranslateInfo& host_info) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
- TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list};
+ TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info};
return syntax_list;
}
diff --git a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
index 88b083649..e38158da3 100644
--- a/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
+++ b/src/shader_recompiler/frontend/maxwell/structured_control_flow.h
@@ -11,10 +11,13 @@
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
-namespace Shader::Maxwell {
+namespace Shader {
+struct HostTranslateInfo;
+namespace Maxwell {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Environment& env,
- Flow::CFG& cfg);
+ Flow::CFG& cfg, const HostTranslateInfo& host_info);
-} // namespace Shader::Maxwell
+} // namespace Maxwell
+} // namespace Shader
diff --git a/src/shader_recompiler/frontend/maxwell/translate_program.cpp b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
index c067d459c..012d55357 100644
--- a/src/shader_recompiler/frontend/maxwell/translate_program.cpp
+++ b/src/shader_recompiler/frontend/maxwell/translate_program.cpp
@@ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) {
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) {
IR::Program program;
- program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
+ program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = PostOrder(program.syntax_list.front());
program.stage = env.ShaderStage();
diff --git a/src/shader_recompiler/host_translate_info.h b/src/shader_recompiler/host_translate_info.h
index 94a584219..96468b2e7 100644
--- a/src/shader_recompiler/host_translate_info.h
+++ b/src/shader_recompiler/host_translate_info.h
@@ -11,8 +11,9 @@ namespace Shader {
/// Misc information about the host
struct HostTranslateInfo {
- bool support_float16{}; ///< True when the device supports 16-bit floats
- bool support_int64{}; ///< True when the device supports 64-bit integers
+ bool support_float16{}; ///< True when the device supports 16-bit floats
+ bool support_int64{}; ///< True when the device supports 64-bit integers
+ bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
};
} // namespace Shader
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ee992aed4..de9e41659 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -156,6 +156,10 @@ public:
return shader_backend;
}
+ bool IsAmd() const {
+ return vendor_name == "ATI Technologies Inc.";
+ }
+
private:
static bool TestVariableAoffi();
static bool TestPreciseBug();
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 1f4dda17e..b0e14182e 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
host_info{
.support_float16 = false,
.support_int64 = device.HasShaderInt64(),
+ .needs_demote_reorder = device.IsAmd(),
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index f316c4f92..31bfbcb06 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw
host_info = Shader::HostTranslateInfo{
.support_float16 = device.IsFloat16Supported(),
.support_int64 = device.IsShaderInt64Supported(),
+ .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR ||
+ driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR,
};
}