summaryrefslogtreecommitdiffstats
path: root/src/video_core/shader/decode.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader/decode.cpp')
-rw-r--r--src/video_core/shader/decode.cpp177
1 files changed, 93 insertions, 84 deletions
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 2c9ff28f2..29c8895c5 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h"
#include "video_core/engines/shader_header.h"
+#include "video_core/shader/control_flow.h"
#include "video_core/shader/node_helper.h"
#include "video_core/shader/shader_ir.h"
@@ -21,20 +22,6 @@ using Tegra::Shader::OpCode;
namespace {
-/// Merges exit method of two parallel branches.
-constexpr ExitMethod ParallelExit(ExitMethod a, ExitMethod b) {
- if (a == ExitMethod::Undetermined) {
- return b;
- }
- if (b == ExitMethod::Undetermined) {
- return a;
- }
- if (a == b) {
- return a;
- }
- return ExitMethod::Conditional;
-}
-
/**
* Returns whether the instruction at the specified offset is a 'sched' instruction.
* Sched instructions always appear before a sequence of 3 instructions.
@@ -51,85 +38,104 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
void ShaderIR::Decode() {
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
- std::set<u32> labels;
- const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels);
- if (exit_method != ExitMethod::AlwaysEnd) {
- UNREACHABLE_MSG("Program does not always end");
- }
-
- if (labels.empty()) {
- basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)});
+ disable_flow_stack = false;
+ const auto info = ScanFlow(program_code, program_size, main_offset);
+ if (info) {
+ const auto& shader_info = *info;
+ coverage_begin = shader_info.start;
+ coverage_end = shader_info.end;
+ if (shader_info.decompilable) {
+ disable_flow_stack = true;
+ const auto insert_block = ([this](NodeBlock& nodes, u32 label) {
+ if (label == exit_branch) {
+ return;
+ }
+ basic_blocks.insert({label, nodes});
+ });
+ const auto& blocks = shader_info.blocks;
+ NodeBlock current_block;
+ u32 current_label = exit_branch;
+ for (auto& block : blocks) {
+ if (shader_info.labels.count(block.start) != 0) {
+ insert_block(current_block, current_label);
+ current_block.clear();
+ current_label = block.start;
+ }
+ if (!block.ignore_branch) {
+ DecodeRangeInner(current_block, block.start, block.end);
+ InsertControlFlow(current_block, block);
+ } else {
+ DecodeRangeInner(current_block, block.start, block.end + 1);
+ }
+ }
+ insert_block(current_block, current_label);
+ return;
+ }
+ LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
+ // we can't decompile it, fallback to standard method
+ for (const auto& block : shader_info.blocks) {
+ basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
+ }
return;
}
+ LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
+
+ // Now we need to deal with an undecompilable shader. We need to brute force
+ // a shader that captures every position.
+ coverage_begin = main_offset;
+ const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
+ coverage_end = shader_end;
+ for (u32 label = main_offset; label < shader_end; label++) {
+ basic_blocks.insert({label, DecodeRange(label, label + 1)});
+ }
+}
- labels.insert(main_offset);
-
- for (const u32 label : labels) {
- const auto next_it = labels.lower_bound(label + 1);
- const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it;
+NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
+ NodeBlock basic_block;
+ DecodeRangeInner(basic_block, begin, end);
+ return basic_block;
+}
- basic_blocks.insert({label, DecodeRange(label, next_label)});
+void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
+ for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
+ pc = DecodeInstr(bb, pc);
}
}
-ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set<u32>& labels) {
- const auto [iter, inserted] =
- exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
- ExitMethod& exit_method = iter->second;
- if (!inserted)
- return exit_method;
-
- for (u32 offset = begin; offset != end && offset != MAX_PROGRAM_LENGTH; ++offset) {
- coverage_begin = std::min(coverage_begin, offset);
- coverage_end = std::max(coverage_end, offset + 1);
-
- const Instruction instr = {program_code[offset]};
- const auto opcode = OpCode::Decode(instr);
- if (!opcode)
- continue;
- switch (opcode->get().GetId()) {
- case OpCode::Id::EXIT: {
- // The EXIT instruction can be predicated, which means that the shader can conditionally
- // end on this instruction. We have to consider the case where the condition is not met
- // and check the exit method of that other basic block.
- using Tegra::Shader::Pred;
- if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
- return exit_method = ExitMethod::AlwaysEnd;
- } else {
- const ExitMethod not_met = Scan(offset + 1, end, labels);
- return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
- }
+void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
+ const auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
+ Node result = n;
+ if (cond.cc != ConditionCode::T) {
+ result = Conditional(GetConditionCode(cond.cc), {result});
}
- case OpCode::Id::BRA: {
- const u32 target = offset + instr.bra.GetBranchTarget();
- labels.insert(target);
- const ExitMethod no_jmp = Scan(offset + 1, end, labels);
- const ExitMethod jmp = Scan(target, end, labels);
- return exit_method = ParallelExit(no_jmp, jmp);
- }
- case OpCode::Id::SSY:
- case OpCode::Id::PBK: {
- // The SSY and PBK use a similar encoding as the BRA instruction.
- UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
- "Constant buffer branching is not supported");
- const u32 target = offset + instr.bra.GetBranchTarget();
- labels.insert(target);
- // Continue scanning for an exit method.
- break;
+ if (cond.predicate != Pred::UnusedIndex) {
+ u32 pred = static_cast<u32>(cond.predicate);
+ const bool is_neg = pred > 7;
+ if (is_neg) {
+ pred -= 8;
+ }
+ result = Conditional(GetPredicate(pred, is_neg), {result});
}
- default:
- break;
+ return result;
+ });
+ if (block.branch.address < 0) {
+ if (block.branch.kills) {
+ Node n = Operation(OperationCode::Discard);
+ n = apply_conditions(block.branch.cond, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+ return;
}
+ Node n = Operation(OperationCode::Exit);
+ n = apply_conditions(block.branch.cond, n);
+ bb.push_back(n);
+ global_code.push_back(n);
+ return;
}
- return exit_method = ExitMethod::AlwaysReturn;
-}
-
-NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
- NodeBlock basic_block;
- for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
- pc = DecodeInstr(basic_block, pc);
- }
- return basic_block;
+ Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
+ n = apply_conditions(block.branch.cond, n);
+ bb.push_back(n);
+ global_code.push_back(n);
}
u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
@@ -140,15 +146,18 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
+ const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
// Decoding failure
if (!opcode) {
UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
+ bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
+ nv_address, instr.value)));
return pc + 1;
}
- bb.push_back(
- Comment(fmt::format("{}: {} (0x{:016x})", pc, opcode->get().GetName(), instr.value)));
+ bb.push_back(Comment(
+ fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
using Tegra::Shader::Pred;
UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,